# AFF_cleanup use strict; use warnings; $|=1; undef $/; my $file = $ARGV[0]; open(INPUT, $file) or die("Input file $file not found.\n"); my $output = $ARGV[1]; open(OUTPUT, '>'.$output) or die("Output file $output not found.\n"); my $content = ; # Forkort universitetsnavne #$content =~ s/, University of /, Univ /ig; $content =~ s/Universit([\w\ ]?)/Univ/ig; $content =~ s/Universit.*? di /Univ /ig; $content =~ s/Univ\./Univ/ig; $content =~ s/Universidad/Univ/ig; $content =~ s/Univ degli Studi di/Univ/ig; $content =~ s/Univ di/Univ/ig; $content =~ s/Université/Univ/ig; $content =~ s/Universitè/Univ/ig; $content =~ s/Universitat/Univ/ig; $content =~ s/Università/Univ/ig; $content =~ s/Universita'/Univ/ig; $content =~ s/Universitá/Univ/ig; $content =~ s/Universidade/Univ/ig; # Forkort institutnavne: $content =~ s/ of / /ig; $content =~ s/ and / /ig; $content =~ s/ for / /ig; $content =~ s/ für / /ig; $content =~ s/ de / /ig; $content =~ s/ d'/ /ig; $content =~ s/ do / /ig; $content =~ s/ of / /ig; $content =~ s/ zu / /ig; $content =~ s/'//ig; $content =~ s/"//ig; $content =~ s/\.//ig; $content =~ s/ & / /ig; $content =~ s/Center/Cent/ig; $content =~ s/Centre/Cent/ig; $content =~ s/Dep /Dept /ig; $content =~ s/Department /Dept /ig; $content =~ s/Departments /Dept /ig; $content =~ s/Dept\. /Dept /ig; $content =~ s/Depts/Depts/ig; $content =~ s/Departament/Departament/ig; $content =~ s/Departamento/Departamento/ig; $content =~ s/Hospital/Hosp/ig; $content =~ s/Institute/Inst/ig; $content =~ s/Labor([\w]*?)([\ \r\n])/Lab /ig; $content =~ s/Lehrstuhl für /Dept /ig; $content =~ s/Research/Res/ig; $content =~ s/School of /Sch /ig; $content =~ s/Section of /Sect /ig; $content =~ s/Tech([\w]*?)([\ \r\n])/Tech /ig; $content =~ s/Acad/Acad/ig; # Forkort institutnavne: Fag - enkelte ord midt i streng: $content =~ s/Advanced/Adv/ig; $content =~ s/Africa([\w]*?)([\ ])/Afr/ig; $content =~ s/Agricultur([\w]*?)([\ ])/Agricult/ig; $content =~ s/Anaesthesi([\w]*?)([\ ])/Anaesthes/ig; $content =~ s/Analy([\w]*?)([\ ])/Anal/ig; $content =~ s/Anatom([\w]*?)([\ ])/Anatom/ig; $content =~ s/Anesthesi([\w]*?)([\ ])/Anesthes/ig; $content =~ s/Antibioti([\w]*?)([\ ])/Antibiot/ig; $content =~ s/Applied/Appl/ig; $content =~ s/Architectur([\w]*?)([\ ])/Architect/ig; ##$content =~ s/Assessment/Assessment/ig; $content =~ s/Bacteriol([\w]*?)([\ ])/Bacteriol/ig; $content =~ s/Behavi([\w]*?)([\ ])/Behav/ig; $content =~ s/Biochem([\w]*?)([\ ])/Biochem/ig; $content =~ s/Biochim([\w]*?)([\ ])/Biochim/ig; $content =~ s/Bioenerg([\w]*?)([\ ])/Bioenerg/ig; $content =~ s/Bioengineer([\w]*?)([\ ])/Bioeng/ig; $content =~ s/Biogeochemi([\w]*?)([\ ])/Biogeochemi/ig; $content =~ s/Bioinformatic([\w]*?)([\ ])/Bioinformatic/ig; $content =~ s/Bioinorganic/Bioinorganic/ig; $content =~ s/Biolog([\w]*?)([\ ])/Biol/ig; ## enkeltord der slutter med mellemrum, komma eller punktum $content =~ s/Biomacromol([\w]*?)([\ ])/Biomacromol/ig; $content =~ s/Biomater([\w]*?)([\ ])/Biomater/ig; $content =~ s/Biomedical/Biomed/ig; $content =~ s/Biomembr([\w]*?)([\ ])/Biomembr/ig; $content =~ s/Biophysic([\w]*?)([\ ])/Biophysic/ig; $content =~ s/Biostatist([\w]*?)([\ ])/Biostatist/ig; $content =~ s/Cardiovascular/Cardiovasc/ig; $content =~ s/Chemical/Chem/ig; $content =~ s/Chemistry/Chem/ig; $content =~ s/Comput([\w]*?)([\ ])/Comp/ig; $content =~ s/Economic([\w]*?)([\ ])/Economics/ig; $content =~ s/Endocrinolog([\w]*?)([\ ])/Endocrin/ig; $content =~ s/Engineering/Engn/ig; $content =~ s/Environment([\w]*?)([\ ])/Env/ig; $content =~ s/Epidemiolog([\w]*?)([\ ])/Epidemiol/ig; $content =~ s/Gastroenterol([\w]*?)([\ ])/Gastroent/ig; $content =~ s/General/Gen/ig; $content =~ s/Geolog([\w]*?)([\ ])/Geology/ig; $content =~ s/Intensive Care/Intensive Care/ig; $content =~ s/Management/Management/ig; $content =~ s/Mathematic([\w]*?)([\ ])/Mathematic/ig; $content =~ s/Mechanical/Mech/ig; $content =~ s/Medical/Med/ig; $content =~ s/Medicine/Med/ig; $content =~ s/Microbiolog([\w]*?)([\ ])/Microbiol/ig; $content =~ s/Molecular Biolog([\w]*?)([\ ])/Mol Biol/ig; $content =~ s/Oncolog([\w]*?)([\ ])/Oncolog/ig; $content =~ s/Physics/Phys/ig; $content =~ s/Physiology/Physiol/ig; $content =~ s/Science/Sci/ig; $content =~ s/Surg([\w]*?)([\ ])/Surg/ig; $content =~ s/Technical/Tech/ig; $content =~ s/Theoretical/Theor/ig; # Forkort institutnavne: Fag - enkelte ord som sidste ord i streng: $content =~ s/Advanced$/Adv/ig; $content =~ s/Africa([\w]*?)$/Afr/ig; $content =~ s/Agricultur([\w]*?)$/Agricult/ig; $content =~ s/Anaesthesi([\w]*?)$/Anaesthes/ig; $content =~ s/Analy([\w]*?)$/Anal/ig; $content =~ s/Anatom([\w]*?)$/Anatom/ig; $content =~ s/Anesthesi([\w]*?)$/Anesthes/ig; $content =~ s/Antibioti([\w]*?)$/Antibiot/ig; $content =~ s/Applied$/Appl/ig; $content =~ s/Architectur([\w]*?)$/Architect/ig; ##$content =~ s/Assessment$/Assessment/ig; $content =~ s/Bacteriol([\w]*?)$/Bacteriol/ig; $content =~ s/Behavi([\w]*?)$/Behav/ig; $content =~ s/Biochem([\w]*?)$/Biochem/ig; $content =~ s/Biochim([\w]*?)$/Biochim/ig; $content =~ s/Bioenerg([\w]*?)$/Bioenerg/ig; $content =~ s/Bioengineer([\w]*?)$/Bioeng/ig; $content =~ s/Biogeochemi([\w]*?)$/Biogeochemi/ig; $content =~ s/Bioinformatic([\w]*?)$/Bioinformatic/ig; $content =~ s/Bioinorganic$/Bioinorganic/ig; $content =~ s/Biolog([\w]*?)$/Biol/ig; ## enkeltord der slutter med mellemrum, komma eller punktum $content =~ s/Biomacromol([\w]*?)$/Biomacromol/ig; $content =~ s/Biomater([\w]*?)$/Biomater/ig; $content =~ s/Biomedical$/Biomed/ig; $content =~ s/Biomembr([\w]*?)$/Biomembr/ig; $content =~ s/Biophysic([\w]*?)$/Biophysic/ig; $content =~ s/Biostatist([\w]*?)$/Biostatist/ig; $content =~ s/Cardiovascular$/Cardiovasc/ig; $content =~ s/Chemical$/Chem/ig; $content =~ s/Chemistry$/Chem/ig; $content =~ s/Comput([\w]*?)$/Comp/ig; $content =~ s/Economic([\w]*?)$/Economics/ig; $content =~ s/Endocrinolog([\w]*?)$/Endocrin/ig; $content =~ s/Engineering$/Engn/ig; $content =~ s/Environment([\w]*?)$/Env/ig; $content =~ s/Epidemiolog([\w]*?)$/Epidemiol/ig; $content =~ s/Gastroenterol([\w]*?)$/Gastroent/ig; $content =~ s/General$/Gen/ig; $content =~ s/Geolog([\w]*?)$/Geology/ig; $content =~ s/Intensive Care$/Intensive Care/ig; $content =~ s/Management$/Management/ig; $content =~ s/Mathematic([\w]*?)$/Mathematic/ig; $content =~ s/Mechanical$/Mech/ig; $content =~ s/Medical$/Med/ig; $content =~ s/Medicine$/Med/ig; $content =~ s/Microbiolog([\w]*?)$/Microbiol/ig; $content =~ s/Molecular Biolog([\w]*?)$/Mol Biol/ig; $content =~ s/Oncolog([\w]*?)$/Oncolog/ig; $content =~ s/Physics$/Phys/ig; $content =~ s/Physiology$/Physiol/ig; $content =~ s/Science$/Sci/ig; $content =~ s/Surg([\w]*?)$/Surg/ig; $content =~ s/Technical$/Tech/ig; $content =~ s/Theoretical$/Theor/ig; ## Any word without space: ([\w\ ]?) ## Any string of words within commas or ... ([\w\ \-]?) print OUTPUT $content; close(INPUT); close(OUTPUT);