#!/usr/bin/perl # This is a cgi script in perl. # # It creates PDF files of the number googolplex written out in # book form. Since the number googolplex is too large to fit in # a single book, it has been devided into many volumes. Each # PDF file correponds to one of these volumes. # # The number of the requested volume is given to this program # as a query string, for example if this program is available # at http://www.GoogolplexWrittenOut.com/cgi-bin/makepdf.pl then one can # get volume number 12345 as a PDF file # at http://www.GoogolplexWrittenOut.com/cgi-bin/makepdf.pl?12345 # where "12345" is the query string. Setting the query string # to "source", this means visiting something # like http://www.GoogolplexWrittenOut.com/cgi-bin/makepdf.pl?source # shows the source of this perl-script. # # This program returns the created pdf file via http and in # the http header it mentions the canonical URL of the form # http://www.GoogolplexWrittenOut.com/volume12345.pdf # if in this example the query string was 12345. # This means it assumes that the .htaccess file is used # for URL rewriting. # # (all typed...) # # Wolfgang Hartmut Nitsche # http://www.stanford.edu/~nitsche/ # http://nitsche.mobi # http://www.GoogolplexWrittenOut.com ########################################################### ########################################################### ## ## ## I N I T I A L P A R T O F T H I S S C R I P T ## ## ## ########################################################### ########################################################### $versiontext = "1.0.1"; # Text which should be included in the book to show the version. # For example in the form Edition.MajorRevision.MinorRevision use bytes; # Probably this is not necessary, but just to be on the # safe side, we use it anyway. # I want to make sure that length() gives the number # of bytes in a string, not the number of characters. # So Windows-linebreak should have length 2. # Special Unicode characters should have a length of more # than 2, but we don't use them anyway. $query = $ENV{'QUERY_STRING'}; # The query string should usually be the number of the requested volume. # For example $query = '123' will produce the PDF file of book volume 123. # Volume numbers must only consist of digits 0-9 and may not have any leading zero. # And $query = 'source' will show the source code of this perl script. # Any other invalid value for query will give error message. $filename = $ENV{'SCRIPT_FILENAME'}; # This is the file-name of this perl script, for # example $filename = 'makepdf.pl' # The number of the last volume of the books: $voltotal = "1" . ("0" x 94); # Get rid of unwanted special characters in $query $query =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $query =~ s/[^\!-\~]/\?/g; $query =~ s/[\<\>\&]/\?/g; # Show the source code of this perl script, if requested: if($query eq 'source') { $sourcefile = ''; open(FILE, "<$filename"); while() { $sourcefile .= $_; } close(FILE); print "Content-type: text/plain\n\n"; print $sourcefile; exit; } # Check if $query is a valid volume number: $errormessage = ''; if( $query eq '' ) { $errormessage = 'requested volume number is an empty string'; } elsif( $query =~ /[^0-9]/ ) { $errormessage = 'requested volume number contains non-numeric characters'; } elsif( substr($query, 0, 1) eq '0' ) { $errormessage = 'requested volume number starts with leading zero'; } elsif( length($query) > length($voltotal) ) { $errormessage = 'requested volume number is longer and therefore larger than '.$voltotal; } elsif( (length($query) == length($voltotal)) and ($query gt $voltotal) ) { $errormessage = 'requested volume number is of equal length but larger than '.$voltotal; } # If $query is a valid volume number, use its value; otherwise show an error message: if($errormessage eq '') { $volnow = "$query"; } else { print "Content-type: text/html\n"; print "Status: 400 Bad Request\n"; print "\n"; print ''."\n"; print "\n"; print "\n"; print "error (400 Bad Request)\n"; print ''."\n"; print "\n"; print "\n"; print "

error (400 Bad Request)

\n"; print "

The volume which you requested does not exist.

\n"; print "

\n"; print "

Valid volume numbers nust be a number from 1 to $voltotal\n"; print "and must be written without leading zeros\n"; print "and without any non-digit characters.

\n"; print "

\n"; print "Your requested volume number was
\n"; print "$query
\n"; print "which caused the following problem:
\n"; print "$errormessage\n"; print "

\n"; print "\n"; print "\n"; exit; } # If we reach this part of the program, we know # that $volnow is a valid volume number, and # $voltotal is the number of the last possible # volume number, and $versiontext indicates the # version of the book. # We want to produce the PDF file for volume $volnow. ############################################################# ############################################################# ## ## ## S T A R T P R O D U C I N G T H E P D F F I L E ## ## ## ############################################################# ############################################################# # Total number of pages in the PDF file which we want to produce: $pagenumbertotal = 405; $nl = "\015\012"; # $nl stands for NewLine and contains the character which we use # as line-break within the source-code of the PDF. Inentionally, we # do not use \n which has a platform dependent length and might therefore # result in an incorrect total line length in the final xref table. $fs = '%PDF-1.4'; $fs .= $nl . '%'."\342\343\317\323" .$nl; # $fs stands for FileString and should become a string which contains the entire PDF file. # In the second line of the PDF file, we add a few non-ASCII-characters (with values above 128) # to show certain data transfer programs that the final PDF file should be treated as a binary file. $ic = 0; # $ic stands for IndexCounter and is the number # like "$ic 0 obj" when we used it the last time $xs = '0000000000 65535 f'; # $xs stands for XrefString and should become a string which # contains all the data for the final xref table. %rh = (); # %rh is the ReferenceHash # $nn always stands for NowName of current reference # In PDF files you can for example store some data in a block which starts with # 123 0 obj # and later you can for example use the data from this specific block by making a reference to # 123 0 R # # So we have to count the data blocks and keep track of their numbers. # If the last data block which we created started with "123 0 obj" then $ic has a value of 123. # # Often we want to make a reference to a data block which will only # be created later during the run of this perl script, so we do not yet # know which number it will have. So in that case we first use some # placeholder name for example # (*-*(exampledata)*-*) 0 R # instead of # 123 0 R # When we finally create the block "exampledata" and assuming # that this is the 123th block, we first set # $nn="exampledata" # then we increase $ic by one which gives 123 and we start the block with # 123 0 obj # Then we save # $rh{$nn} = $ic; # which means that we add an element to %rh which has # the key 'exampledata' and the value 123. # For this, we always use the sub-program nnsub which is called immediately after # assigning the right data to $nn. # Then, at the end of creating the PDF file we have to replace the temporary # named references by their numbers, for example in the example mentioned # above, we replace all occurences of (*-*(exampledata)*-*) by the number 123. # In this case, we also add a few spaces before 123 so that # the number with the additional spaces has the same length (number of bytes) # as the string (*-*(exampledata)*-*) . This is necessary to make sure that # the byte offset numbers in the final xref table remain valid. # At the end, when we actually want to make the replacements, the # hash %rh gives us the names which we want to replace, as well as the numbers # by which we want to replace them. ##################################################### # # # S T A R T O F M A I N P A R T O F P D F # # # ##################################################### $nn = 'catalog'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /Type /Catalog'; $fs .= $nl. '/Pages (*-*(pages)*-*) 0 R'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'info'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<<'; $fs .= $nl. '/Title (Googolplex Written Out)'; $fs .= $nl. '/Author (Wolfgang H. Nitsche)'; $fs .= $nl. '/Creator (http://www.GoogolplexWrittenOut.com)'; $fs .= $nl. '/Producer (http://www.GoogolplexWrittenOut.com)'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'courier'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /Type /Font'; $fs .= $nl. '/Subtype /Type1'; $fs .= $nl. '/BaseFont /Courier'; $fs .= $nl. '/Encoding /WinAnsiEncoding'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'courierbold'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /Type /Font'; $fs .= $nl. '/Subtype /Type1'; $fs .= $nl. '/BaseFont /Courier-Bold'; $fs .= $nl. '/Encoding /WinAnsiEncoding'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'timesroman'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /Type /Font'; $fs .= $nl. '/Subtype /Type1'; $fs .= $nl. '/BaseFont /Times-Roman'; $fs .= $nl. '/Encoding /WinAnsiEncoding'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'timesbold'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /Type /Font'; $fs .= $nl. '/Subtype /Type1'; $fs .= $nl. '/BaseFont /Times-Bold'; $fs .= $nl. '/Encoding /WinAnsiEncoding'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'weburi'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /S /URI'; $fs .= $nl. '/URI (http://www.GoogolplexWrittenOut.com)'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'weblink2'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /Type /Annot'; $fs .= $nl. '/Subtype /Link'; $fs .= $nl. '/Rect [110 250 400 265]'; $fs .= $nl. '/Border [0 0 0]'; $fs .= $nl. '/A (*-*(weburi)*-*) 0 R'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'weblink4'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /Type /Annot'; $fs .= $nl. '/Subtype /Link'; $fs .= $nl. '/Rect [150 205 450 230]'; $fs .= $nl. '/Border [0 0 0]'; $fs .= $nl. '/A (*-*(weburi)*-*) 0 R'; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; $nn = 'lowerlabel'; # # # # # # # # # # # # # # # # # # # # &nnsub; # $stream = ''; $stream .= 'BT'; $stream .= $nl. '/F2 6.4 Tf'; $stream .= $nl. '103 125 Td'; $stream .= $nl. '(Googolplex Written Out) Tj'; $stream .= $nl. '0 -6.5 Td'; $stream .= $nl. '(volume ' . $volnow . ') Tj'; $stream .= $nl. '0 -6.5 Td'; $stream .= $nl. '(of ' . $voltotal . ') Tj'; $stream .= $nl. 'ET'; #-- &streamsub; $nn = 'spiral'; # # # # # # # # # # # # # # # # # # # # &nnsub; # # The spiral is used for writing the volume number on the # title pages. If the volume number is too long to write it # in a single line (with the used font-size), then it is # written in the form of a spiral. # Settings for spiral: $spiralcharacterwidth = 15; $spirallinespacing = 40; $spiralx0 = 200; $spiraly0 = 410; $spiralradius0incharacters = 9; $spiral2pi = 2 * 3.1415926536; # make calculations for spiral: $spiraltext = 'volume ' . $volnow; $spiralradius0 = $spiralradius0incharacters * $spiralcharacterwidth; $spiralxcenter = $spiralx0 + $spiralradius0; $spiralycenter = $spiraly0 - $spiralradius0; $stream = ''; $stream .= 'BT' . $nl . '/F2 24 Tf'; $spiralangle = 0; $spiralcos = 1; $spiralsin = 0; $spiraly = $spiraly0; for($spiralletternumber=0; $spiralletternumber>'; $fs .= $nl. '>>'; $fs .= $nl. '/Kids ['; for($pagenumbernow=1; $pagenumbernow<=$pagenumbertotal; $pagenumbernow++) { $fs .= $nl. '(*-*(page' . $pagenumbernow . ')*-*) 0 R'; } $fs .= $nl. ']'; $fs .= $nl. '/Count ' . $pagenumbertotal; $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; # create the data blocks which correspond to the individual pages: # # # # # # # # # # # # # # # # # # # # for($pagenumbernow=1; $pagenumbernow<=$pagenumbertotal; $pagenumbernow++) { $nn = 'page'.$pagenumbernow; # # # # # # # # # # # # # # # # # # # # &nnsub; # $fs .= $nl. '<< /Type /Page'; $fs .= $nl. '/Parent (*-*(pages)*-*) 0 R'; $fs .= $nl. '/Contents ['; $fs .= $nl. '(*-*(box)*-*) 0 R'; if($pagenumbernow == 1) { $fs .= $nl. '(*-*(outsidetitle)*-*) 0 R'; $fs .= $nl. '(*-*(spiral)*-*) 0 R'; } if($pagenumbernow == 2) { $fs .= $nl. '(*-*(biblioinfo)*-*) 0 R'; } if($pagenumbernow == 3) { $fs .= $nl. '(*-*(insidetitle)*-*) 0 R'; $fs .= $nl. '(*-*(spiral)*-*) 0 R'; } if($pagenumbernow == 4) { $fs .= $nl. '(*-*(introduction)*-*) 0 R'; } if($pagenumbernow == 5) { if("$volnow" eq "1") { $fs .= $nl. '(*-*(topfirst)*-*) 0 R'; } else { $fs .= $nl. '(*-*(topcontinue)*-*) 0 R'; } } if( ($pagenumbernow >= 6) and ($pagenumbernow <= 404) ) { $fs .= $nl. '(*-*(topzeros)*-*) 0 R'; } if( ($pagenumbernow >= 5) and ($pagenumbernow <= 404) ) { $fs .= $nl. '(*-*(manyzeros)*-*) 0 R'; } if($pagenumbernow == 404) { if("$volnow" eq "$voltotal") { $fs .= $nl. '(*-*(bottomlast)*-*) 0 R'; } else { $fs .= $nl. '(*-*(bottomcontinue)*-*) 0 R'; } } if($pagenumbernow == 405) { $fs .= $nl. '(*-*(leftblank)*-*) 0 R'; } $fs .= $nl. '(*-*(lowerlabel)*-*) 0 R'; $fs .= $nl. '(*-*(pagelabel' . $pagenumbernow . ')*-*) 0 R'; $fs .= $nl. ']'; if($pagenumbernow == 2) { $fs .= $nl. '/Annots [ (*-*(weblink2)*-*) 0 R ]'; } if($pagenumbernow == 4) { $fs .= $nl. '/Annots [ (*-*(weblink4)*-*) 0 R ]'; } $fs .= $nl. '>>'; $fs .= $nl. 'endobj'; $fs .= $nl; } ################################################### # # # S T A R T B O T T O M O F P D F F I L E # # # ################################################### # add the bottom stuff (like xref table and trailer) to the PDF file: $fs .= $nl; $startxref = length($fs); $fs .= 'xref'; ## $fs .= $nl. "0 " . (1 + $ic); $fs .= $nl. $xs; $fs .= $nl; $fs .= $nl. 'trailer'; $fs .= $nl. '<<'; $fs .= $nl. '/Size ' . (1 + $ic); $fs .= $nl. '/Root (*-*(catalog)*-*) 0 R'; $fs .= $nl. '/Info (*-*(info)*-*) 0 R'; $fs .= $nl. '>>'; $fs .= $nl. 'startxref'; $fs .= $nl. $startxref; $fs .= $nl. '%%EOF'; # replace the placeholder words like (*-*(exampledata)*-*) by the respective numbers: for (keys %rh) { $nowkey = $_; $referenceasname = '(*-*(' . $nowkey . ')*-*)'; $referencewantformat = '% ' . length($referenceasname) . 's'; $referenceasname =~ s/([^a-zA-Z0-9_])/\\$1/g; $referenceasnumber = sprintf($referencewantformat, $rh{$nowkey}); $fs =~ s/$referenceasname/$referenceasnumber/g; } ############################################################# ############################################################# ## ## ## N O W T H E P D F F I L E I S P R O D U C E D ## ## A N D W E W A N T T O S H O W I T . ## ## ## ############################################################# ############################################################# print "Content-type: application/pdf\n"; print 'Link: ; rel="canonical"'."\n"; print "\n"; print $fs; exit; ################################################ ################################################ ## ## ## E N D O F M A I N P R O G R A M ## ## S T A R T O F S U B - P R O G R A M S ## ## ## ################################################ ################################################ # This is the sup-program which we always call immediately # after assigning a new value to $nn. It will then add the # corresponding data of the form (after final replacement) # 123 0 obj # to the file string $fs, # and also the line of the form # 0000032323 00000 n # to $xs for the xref table. sub nnsub { $fs .= $nl; $xs .= $nl. sprintf('%010s', length($fs)) .' 00000 n'; $fs .= ++$ic ." 0 obj"; $fs .= $nl. '% ' . $nn . ' = ' . "(*-*($nn)*-*)" .' = ' . $ic; $fs .= $nl. '% --------------------'; $rh{$nn} = $ic; $nn = ''; } # This is the sub-program which we call whenever $stream # contains the data which we want to add now to # to the file string $fs # as a stream. # Usually the $stream which we give to this sub-program # will neither start nor end with a line-break, and this # sub-program will add all required additional linebreaks. sub streamsub { $stream .= $nl; $fs .= $nl. '<< /Length ' . length($stream) . ' >>'; $fs .= $nl. 'stream'; $fs .= $nl. $stream; $fs .= 'endstream'; $fs .= $nl. 'endobj'; $fs .= $nl; $stream = ''; } ################################# ################################# ## ## ## E N D O F P R O G R A M ## ## ## ################################# #################################