contrib/unicode2nginx/unicode-to-nginx.pl - nginx source code

  1. #!/usr/bin/perl -w

  2. # Convert unicode mappings to nginx configuration file format.

  3. # You may find useful mappings in various places, including
  4. # unicode.org official site:
  5. #
  6. # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
  7. # http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT

  8. # Needs perl 5.6 or later.

  9. # Written by Maxim Dounin, mdounin@mdounin.ru

  10. ###############################################################################

  11. require 5.006;

  12. while (<>) {
  13.     # Skip comments and empty lines

  14.     next if /^#/;
  15.     next if /^\s*$/;
  16.     chomp;

  17.     # Convert mappings

  18.     if (/^\s*0x(..)\s*0x(....)\s*(#.*)/) {
  19.         # Mapping <from-code> <unicode-code> "#" <unicode-name>
  20.         my $cs_code = $1;
  21.         my $un_code = $2;
  22.         my $un_name = $3;

  23.         # Produce UTF-8 sequence from character code;

  24.         my $un_utf8 = join('',
  25.             map { sprintf("%02X", $_) }
  26.             unpack("U0C*", pack("U", hex($un_code)))
  27.         );

  28.         print "    $cs_code  $un_utf8 ; $un_name\n";

  29.     } else {
  30.         warn "Unrecognized line: '$_'";
  31.     }
  32. }

  33. ###############################################################################