Talk:Windows code page

From Wikipedia, the free encyclopedia

the generator code is given below, readtxt.pas can be obtained from http://bewareserv.sourceforge.net/ most of the data used came from http://www.unicode.org/PUBLIC/mappings the .mspx.html files used where unicode.org didn't have a mapping availible for the code page in question are from http://www.microsoft.com/globaldev/reference/oem.mspx

program byposition;

uses
  sysutils,readtxt; //we use our own text reader as the delphi one can't handle
                    //unix format text
const
  maxcharset =12;//9;
  startat = $80;
{$define showallchars}
{$define breakbeforecodepoint}
var
  buildarray : array[0..255,0..maxcharset] of longint;
  names : array[0..31] of string;
procedure processcharset(name:string;number:byte;filename:string);
var
  t: treadtext;
  line:string;
  i,j:integer;
begin
  names[number] := name;

  readtext_init(t,filename);
  repeat
    line := readtext_line(t);
    if (length(line)>=11) and (line[2]='x') and (line[7]='x') then begin;
      //writeln('processing line '+line);
      //unicode.org format
      buildarray[strtoint('$'+copy(line,3,2)),number] :=  strtoint('$'+copy(line,8,4));
    end else if (length(line)>=11) and (copy(line,3,5)=' = U+') then begin;
      //ms format
      buildarray[strtoint('$'+copy(line,1,2)),number] :=  strtoint('$'+copy(line,8,4));
    end;
  until readtext_eof(t);
end;
var
  t: textfile;
  i,j,k : integer;
  firstline : boolean;
  goodline : boolean;
  rowcounter : integer;
  comparevalue : integer;
begin
  for i := 0 to 255 do for j := 0 to maxcharset do buildarray[i,j] := -1;

  {processcharset('windows-874|874',0,'CP874.txt');
  processcharset('windows-1250|1250',1,'CP1250.txt');
  processcharset('windows-1251|1251',2,'CP1251.txt');
  processcharset('windows-1252|1252',3,'CP1252.txt');
  processcharset('windows-1253|1253',4,'CP1253.txt');
  processcharset('windows-1254|1254',5,'CP1254.txt');
  processcharset('windows-1255|1255',6,'CP1255.txt');
  processcharset('windows-1256|1256',7,'CP1256.txt');
  processcharset('windows-1257|1257',8,'CP1257.txt');
  processcharset('windows-1258|1258',9,'CP1258.txt');}

  processcharset('code page 437|437',0,'CP437.txt');
  processcharset('code page 720|720',1,'720.mspx.html');
  processcharset('code page 737|737',2,'CP737.txt');
  processcharset('code page 775|775',3,'CP775.txt');
  processcharset('code page 850|850',4,'CP850.txt');
  processcharset('code page 852|852',5,'CP852.txt');
  processcharset('code page 855|855',6,'CP855.txt');
  processcharset('code page 857|857',7,'CP857.txt');
  processcharset('code page 858|858',8,'858.mspx.html');
  processcharset('code page 862|862',9,'CP862.txt');
  processcharset('code page 866|866',10,'CP866.txt');
  processcharset('windows-874|874',11,'CP874.txt');
  processcharset('windows-1258|1258',12,'CP1250.txt');

  assignfile(t,'output.txt');
  rewrite(t);
  writeln(t,'<table {{prettytable}}>');


  firstline := true;
  rowcounter := 0;
  for i := startat to 255 do begin
    goodline := false;
    comparevalue := buildarray[i,0];
    {$ifdef showallchars}
      goodline := true;
    {$else}
      for j := 1 to maxcharset do begin
        if comparevalue <> buildarray[i,j] then goodline := true;
      end;
    {$endif}
    if goodline then begin
      if (rowcounter and ($1F shr(0{$ifndef twocol}+1{$endif} {$ifdef breakbeforecodepoint}+1{$endif} ))) = 0 then begin
        write(t,'<tr>');
        {$ifdef twocol}for j := 1 to 2 do{$endif} begin

          write(t,'<td>position<br>([[hexadecimal|hex]])');
          for k := 0 to maxcharset do begin;
            write(t,'<td>[['+names[k]+']]');
          end;
        end;
      end;
      {$ifdef twocol}if (rowcounter and 1) =0 then{$endif} write(t,'<tr>');
        write(t,'<td>'+inttohex(i,2));
      inc(rowcounter);
      //if firstline then begin
      //  firstline := false;
      //  write(t,'<td>{{uplusfirst}}'+inttohex(i,4));
      //end else begin

      //end;




      for j := 0 to maxcharset do begin
        write(t,'<td>');
        case buildarray[i,j] of
          -1  : ;
          $00 : write(t,'[[NUL]]');
          $01 : write(t,'[[SOH]]');
          $02 : write(t,'[[STX]]');
          $03 : write(t,'[[ETX]]');
          $04 : write(t,'[[EOT]]');
          $05 : write(t,'[[ENQ]]');
          $06 : write(t,'[[ACK]]');
          $07 : write(t,'[[BEL]]');
          $08 : write(t,'[[BS]]');
          $09 : write(t,'[[TAB]]');
          $0A : write(t,'[[LF]]');
          $0B : write(t,'[[VT]]');
          $0C : write(t,'[[FF]]');
          $0D : write(t,'[[CR]]');
          $0E : write(t,'[[SO]]');
          $0F : write(t,'[[SI]]');

          $10 : write(t,'[[DLE]]');
          $11 : write(t,'[[DC1]]');
          $12 : write(t,'[[DC2]]');
          $13 : write(t,'[[DC3]]');
          $14 : write(t,'[[DC4]]');
          $15 : write(t,'[[NAK]]');
          $16 : write(t,'[[SYN]]');
          $17 : write(t,'[[ETB]]');
          $18 : write(t,'[[CAN]]');
          $19 : write(t,'[[EM]]');
          $1A : write(t,'[[SUB]]');
          $1B : write(t,'[[ESC]]');
          $1C : write(t,'[[FS]]');
          $1D : write(t,'[[GS]]');
          $1E : write(t,'[[RS]]');
          $1F : write(t,'[[US]]');

          $80 : write(t,'[[PAD]]');
          $81 : write(t,'[[HOP]]');
          $82 : write(t,'[[BPH]]');
          $83 : write(t,'[[NBH]]');
          $84 : write(t,'[[IND]]');
          $85 : write(t,'[[NEL]]');
          $86 : write(t,'[[SSA]]');
          $87 : write(t,'[[ESA]]');
          $88 : write(t,'[[HTS]]');
          $89 : write(t,'[[HTJ]]');
          $8A : write(t,'[[VTS]]');
          $8B : write(t,'[[PLD]]');
          $8C : write(t,'[[PLU]]');
          $8D : write(t,'[[RI]]');
          $8E : write(t,'[[SS2]]');
          $8F : write(t,'[[SS3]]');

          $90 : write(t,'[[DCS]]');
          $91 : write(t,'[[PU1]]');
          $92 : write(t,'[[PU2]]');
          $93 : write(t,'[[STS]]');
          $94 : write(t,'[[CCH]]');
          $95 : write(t,'[[MW]]');
          $96 : write(t,'[[SPA]]');
          $97 : write(t,'[[EPA]]');
          $98 : write(t,'[[SOS]]');
          $99 : write(t,'[[SGCI]]');
          $9A : write(t,'[[SCI]]');
          $9B : write(t,'[[CSI]]');
          $9C : write(t,'[[ST]]');
          $9D : write(t,'[[OSC]]');
          $9E : write(t,'[[PM]]');
          $9F : write(t,'[[APC]]');

          $A0 : write(t,'[[NBSP]]');
          $AD : write(t,'[[SHY]]');


          else write(t,'[[&#x'+inttohex(buildarray[i,j],4)+';]]');
        end;
        {$ifdef breakbeforecodepoint}
          if buildarray[i,j] >=0 then write(t,'<br><small>U+'+inttohex(buildarray[i,j],4)+'</small>');
        {$else}
          if buildarray[i,j] >=0 then write(t,'<sub>U+'+inttohex(buildarray[i,j],4)+'</sub>');
        {$endif}
      end;
      writeln(t,'</td>');


    end;
  end;
  writeln(t,'</table>');
  closefile(t);
  //for counter := 0 to 65535 do begin;

end.

[edit] ANSI or not?

Once and for all, is it correct to say "ANSI" to the Windows code pages? Currently, some pages on wikipedia say it's wrong (as ANSI never defined these code pages, but Microsoft just says "ANSI" to it anyway), while this article makes the impression that it is ok. --Abdull 23:53, 17 March 2006 (UTC)

Well microsofts technical documents use that term all over the place and i don't belive anyone uses the term ansi code page for anything else. I can't imagine ANSI are particularlly happy about having thier name put to something that isn't thiers though. I guess it all depends on how you define right and wrong ;) Plugwash 10:44, 18 March 2006 (UTC)