#!/usr/bin/perl -w # htmo_to_text # VERSION: 2 (13 September 2004) # PURPOSE: Convert HTML files to text files # Very simple minded and not very effective # INPUT FILES: Any file in HTML format # OUTPUT FILES: Same file in text format ############## LIBRARIES AND PRAGMAS ################ use strict; #################### VARIABLES ###################### my $line; # explanation of variable my $text; # explanation of variable ###################### FILES ######################## my $html_file = 'C:\temp\Program5\Coordinate system transformation.html'; open HTML_FILE, "<$html_file" or die "Can't open $html_file: $!\n"; my $text_file = 'C:\temp\Program5\Coordinate system transformation.txt'; open TEXT_FILE, ">$text_file" or die "Can't open $text_file: $!\n"; ################### MAIN PROGRAM #################### $text = Read_html_file(); Convert_formatting(); Remove_remaining_tags(); Write_text_file(); #################### SUBROUTINES #################### #### READ_HTML_FILE # Concatenates file into a single string sub Read_html_file { my $text = ""; $line = ; while (defined $line) { chomp $line; $text .= $line; $line = ; } return $text; } #### CONVERT_FORMATTING # Converts

into two LFs # Converts
into one line feeds # One could do more, but that's it for now sub Convert_formatting { $text =~ s/

/\n\n\n/g; $text =~ s/
/\n\n/g; } #### REMOVE_REMAINING_TAGS # Delete <...> sub Remove_remaining_tags { $text =~ s/<.+>//g; } #### WRITE_TEXT_FILE # Write what remains to a text file # Use max line length sub Write_text_file { print TEXT_FILE $text; }