# CBS_split - version 16.06.2017 kl. 12.00
# Udtrękker datafelter fra CBS-poster HTML-kildekode
use strict;
use warnings;
my $file = $ARGV[0];
open(INPUT, $file) or die("Input file $file not found.\n");
my $output = $ARGV[1];
open(OUTPUT, '>'.$output) or die "Can't create $output.\n";
open TEMP, "+>temp2.txt" or die $!;
while(my $line = ) {
$line =~ s|\n| |ig; #erstat linieskift med blanktegn overalt i input-fil
$line =~ s|\t| |ig; #erstat TAB med blanktegn overalt i input-fil
$line =~ s|/ / (.*?)|$1|ig;
$line =~ s|(.*?)|$1|ig;
$line =~ s|(.*?)|$1|ig;
$line =~ s|(.*?)|$1|ig;
$line =~ s|(.*?)|$1|ig;
$line =~ s|(.*?)|$1|ig;
$line =~ s|(.*?)|$3 [$2]|ig;
print TEMP $line; }
print "Empty lines, TABs a.o. removed, & converted to &\n\n";
close(TEMP);
open(TEMP2, ") {
# Delete headers and footers
$line2 =~ s|||ig;
$line2 =~ s|.*?