diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b7f6df --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*~ +\#*\# \ No newline at end of file diff --git a/doc/Makefile b/doc/Makefile index c6edf2b..54e28a0 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -40,6 +40,6 @@ ifdef DOC_DIR rm $(DOC_DIR)/utt.pdf rm $(DOC_DIR)/utt.ps endif - + clean: - rm utt.info utt.dvi utt.html utt.pdf utt.ps || true + rm -f *.info *.dvi *.html *.pdf *.ps *.log *.aux diff --git a/doc/dgp.tex b/doc/dgp.tex new file mode 100644 index 0000000..9713305 --- /dev/null +++ b/doc/dgp.tex @@ -0,0 +1,209 @@ +\documentclass[a4paper]{report} + +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} + +\title{DGP} +\author{Tomasz Obrębski} + + +\begin{document} +\maketitle + +\chapter{Introduction} + +\chapter{Grammar} + +\chapter{Parsing algorithm} + +\chapter{Input} + +Wejście dla parsera przygotowuje się w następujący sposób: +\begin{verbatim} +cat text.txt | tok | sen | lem | canonize | gph | dgp ... +\end{verbatim} + +Plik wejściowy + +dgp bierze na wejściu graf słów (wordgraph). Numery wierzchołków tego +grafu to wartości pola gph. Pole to jest wprowadzane do pliku przez +program gph. + +Poza polem gph, dgp odczytuje też wartość pola lem. + +\chapter{Output} + +Format: + +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:4:1,2,3 dgp:6;s + +\begin{verbatim} + +dgp:;[;][;][;] + +\end{verbatim} + +\begin{description} +\item[{\it node}] Dependency graph node number. +\item[saturation] The information whether the node is saturated. A + node is saturated if the list of required connections for this node + is empty, it is unsaturated otherwise. +\item[links] The comma separated list of connections. For each node + either the list of its dependents or the list of its heads may be + printed, or both (this dependes on the value of the \verb|--info| + parameter). +\item[sets] For each node, the sets of all its left neighbours, + transitive left heads, transitive left dependents, and nodes visible + on the left can be printed. (This information is useful for fast + tree generation.) +\item[constraints] the information on constraints imposed on the + node. Constraints follow from the SGL and REQ grammar rules and have + the form of a comma-separated list of dependency types required by + the node and forbidden for the node. The elements of the list have + the following format: + + \begin{tabular}{ll} + + \verb|!|{\it dependency type} & {\it dependency type} is required\\ + \verb|&|{\it dependency type} & {\it dependency type} is forbidden + + \end{tabular} + +\end{description} + + +Wynikiem pracy dgp jest graf zależności. Graf ten może zawierać +(zwykle tak jest) więcej wierzchołków niż graf wejściowy. + +* numer wierzchołka w wyjściowym grafie zależności + +Numery wierzchołków w wyjściowym grafie są inne. Podczas działania +parser tworzy kopie (klony) wierzchołków wejściowych. Dzieje się tak w +sytuacji, kiedy do wierzchołka (jako nadrzędnika) dowiazywana jest +zależnośc objęta ograniczeniami. Ograniczenia wynikają z reguł +gramatyki SGL i OBL. + +SGL - zależność jednokrotna +OBL - zależność obligatiryjna + +node saturation \verb|s| or \verb|u| + + s - wierzchołek nasycony + u - wierzchołek nienasycony + + Wierzchołek nienasycony to taki, któremu brakuje obowiązkowy podrzędnik. + Obowiązkowe podrzędniki określane są w regułach OBL gramatyki. + +connections + * connection list + connections are lista zależności zawiera oddzielony przecinkami ciąg wyrażeń + + ---/ + + jeśli w wywołaniu programu dla parametru --info podano wśród wartości 'd' + (od dependents) + + lub + + ++-/ + + jeśli w wywołaniu programu dla parametru --info podano wśród wartości 'h' + + + Może też zawierac oba typy wyrażeń, jeśli podano zarówno 'd' jak i 'h'. + + Wyrażenie + + ---/ + + oznacza możliwość istnienia zależności typu , której nadrzędnikiem jest aktualny wierzchołek, a podrzędnikiem + wierzchołek (o za chwilę). + + +pies goni czarnego kota w butach. + +\begin{figure} +\begin{verbatim} +0000 00 BOS * +0000 04 W Pies lem:pies,N/CnGaNs +0004 01 S _ +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp +0009 01 S _ +0010 08 W czarnego lem:czarny,ADJ/CaDpGapNs +0010 08 W czarnego lem:czarny,ADJ/CgDpGainpNs +0018 01 S _ +0019 04 W kota lem:kota,N/CnGfNs +0019 04 W kota lem:kot,N/CaGaNs +0019 04 W kota lem:kot,N/CgGaNs +0023 01 S _ +0024 01 W w lem:w,P/Cal +0025 01 S _ +0026 06 W butach lem:buta,N/ClGfNp +0026 06 W butach lem:but,N/ClGiNp +0032 01 P . +0033 01 S \n +0034 00 EOS * +\end{verbatim} +\caption{output of \verb@tok | sen | lem | canonize@} +\end{figure} + + +\begin{figure} +\scriptsize +\begin{verbatim} +0000 00 BOS * gph:0: +0000 04 W Pies lem:pies,N/CnGaNs gph:1:0 +0004 01 S _ +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 +0009 01 S _ +0010 08 W czarnego lem:czarny,ADJ/CaDpGapNs gph:3:2 +0010 08 W czarnego lem:czarny,ADJ/CgDpGainpNs gph:4:2 +0018 01 S _ +0019 04 W kota lem:kota,N/CnGfNs gph:5:3,4 +0019 04 W kota lem:kot,N/CaGaNs gph:6:3,4 +0019 04 W kota lem:kot,N/CgGaNs gph:7:3,4 +0023 01 S _ +0024 01 W w lem:w,P/Cal gph:8:5,6,7 +0025 01 S _ +0026 06 W butach lem:buta,N/ClGfNp gph:9:8 +0026 06 W butach lem:but,N/ClGiNp gph:10:8 +0032 01 P . +0033 01 S \n +0034 00 EOS * gph:11:9,10 +\end{verbatim} +\caption{Word graph representation: sentence annotated with gph.} +\end{figure} + + +\begin{figure} + \scriptsize +\begin{verbatim} +0000 00 BOS * gph:0: dgp:0;s;; +0000 04 W Pies lem:pies,N/CnGaNs gph:1:0 dgp:1;s;; +0004 01 S _ +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 dgp:2;s;; +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 dgp:3;s;--subj-1/2;!subj +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 dgp:8;s;--cmpl_ga-7/3,--cmpl_ga-10/3,--prep-11/8;!subj!cmpl_ga +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 dgp:9;s;--cmpl_ga-7/2,--cmpl_ga-10/2,--prep-11/9;!cmpl_ga +0009 01 S _ +0010 08 W czarnego lem:czarny,ADJ/CaDpGapNs gph:3:2 dgp:4;s;; +0010 08 W czarnego lem:czarny,ADJ/CgDpGainpNs gph:4:2 dgp:5;s;; +0018 01 S _ +0019 04 W kota lem:kota,N/CnGfNs gph:5:3,4 dgp:6;s;--prep-11/6; +0019 04 W kota lem:kot,N/CaGaNs gph:6:3,4 dgp:7;s;--mod-4/7,--prep-11/7; +0019 04 W kota lem:kot,N/CgGaNs gph:7:3,4 dgp:10;s;--mod-5/10,--prep-11/10; +0023 01 S _ +0024 01 W w lem:w,P/Cal gph:8:5,6,7 dgp:11;u;;&pcmpl +0024 01 W w lem:w,P/Cal gph:8:5,6,7 dgp:13;s;--pcmpl-12/11,--pcmpl-14/11;!pcmpl +0025 01 S _ +0026 06 W butach lem:buta,N/ClGfNp gph:9:8 dgp:12;s;; +0026 06 W butach lem:but,N/ClGiNp gph:10:8 dgp:14;s;; +0032 01 P . +0033 01 S \n +0034 00 EOS * gph:11:9,10 dgp:15;s;; +\end{verbatim} +\caption{dgp output} +\end{figure} + + +\end{document} diff --git a/doc/dgp/WARNINGS b/doc/dgp/WARNINGS new file mode 100644 index 0000000..d354810 --- /dev/null +++ b/doc/dgp/WARNINGS @@ -0,0 +1,14 @@ +No implementation found for style `fontenc' + +? brace missing for \contentsline +couldn't convert character `tilde into available encodings + + ...set $ACCENT_IMAGES to get an image + +No number for "outputof" + +Failed to convert image /tmp/l2h18811/image003.ps + +Failed to convert image /tmp/l2h18811/image002.ps + +Failed to convert image /tmp/l2h18811/image001.ps diff --git a/doc/dgp/dgp.css b/doc/dgp/dgp.css new file mode 100644 index 0000000..320271f --- /dev/null +++ b/doc/dgp/dgp.css @@ -0,0 +1,35 @@ +/* Century Schoolbook font is very similar to Computer Modern Math: cmmi */ +.MATH { font-family: "Century Schoolbook", serif; } +.MATH I { font-family: "Century Schoolbook", serif; font-style: italic } +.BOLDMATH { font-family: "Century Schoolbook", serif; font-weight: bold } + +/* implement both fixed-size and relative sizes */ +SMALL.XTINY { font-size : xx-small } +SMALL.TINY { font-size : x-small } +SMALL.SCRIPTSIZE { font-size : smaller } +SMALL.FOOTNOTESIZE { font-size : small } +SMALL.SMALL { } +BIG.LARGE { } +BIG.XLARGE { font-size : large } +BIG.XXLARGE { font-size : x-large } +BIG.HUGE { font-size : larger } +BIG.XHUGE { font-size : xx-large } + +/* heading styles */ +H1 { } +H2 { } +H3 { } +H4 { } +H5 { } + +/* mathematics styles */ +DIV.displaymath { } /* math displays */ +TD.eqno { } /* equation-number cells */ + + +/* document-specific styles come next */ +DIV.navigation { } +SPAN.normalfont { } +PRE.preform { } +SPAN.it { } +SPAN.arabic { } diff --git a/doc/dgp/dgp.html b/doc/dgp/dgp.html new file mode 100644 index 0000000..a949906 --- /dev/null +++ b/doc/dgp/dgp.html @@ -0,0 +1,72 @@ + + + + + +DGP + + + + + + + + + + + + + + + + + + +

DGP

+
+ +

Tomasz Obrębski

+
+

+ + + + + +

+
+to +2014-12-19 +
+ + diff --git a/doc/dgp/images.aux b/doc/dgp/images.aux new file mode 100644 index 0000000..f23e546 --- /dev/null +++ b/doc/dgp/images.aux @@ -0,0 +1 @@ +\relax diff --git a/doc/dgp/images.log b/doc/dgp/images.log new file mode 100644 index 0000000..12671e5 --- /dev/null +++ b/doc/dgp/images.log @@ -0,0 +1,357 @@ +This is pdfTeX, Version 3.1415926-2.4-1.40.13 (TeX Live 2012/Debian) (format=latex 2014.12.5) 19 DEC 2014 18:49 +entering extended mode + restricted \write18 enabled. + %&-line parsing enabled. +**./images.tex +(./images.tex +LaTeX2e <2011/06/27> +Babel and hyphenation patterns for english, dumylang, nohyphenation, po +lish, loaded. + +(/usr/share/texlive/texmf-dist/tex/latex/base/report.cls +Document Class: report 2007/10/19 v1.4h Standard LaTeX document class +(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo +File: size10.clo 2007/10/19 v1.4h Standard LaTeX file (size option) +) +\c@part=\count79 +\c@chapter=\count80 +\c@section=\count81 +\c@subsection=\count82 +\c@subsubsection=\count83 +\c@paragraph=\count84 +\c@subparagraph=\count85 +\c@figure=\count86 +\c@table=\count87 +\abovecaptionskip=\skip41 +\belowcaptionskip=\skip42 +\bibindent=\dimen102 +) (/usr/share/texlive/texmf-dist/tex/latex/base/ifthen.sty +Package: ifthen 2001/05/26 v1.1c Standard LaTeX ifthen package (DPC) +) (/usr/share/texlive/texmf-dist/tex/latex/base/fontenc.sty +Package: fontenc 2005/09/27 v1.99g Standard LaTeX package +(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.def +File: t1enc.def 2005/09/27 v1.99g Standard LaTeX file +LaTeX Font Info: Redeclaring font encoding T1 on input line 43. +)) (/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty +Package: inputenc 2008/03/30 v1.1d Input encoding file +\inpenc@prehook=\toks14 +\inpenc@posthook=\toks15 +(/usr/share/texlive/texmf-dist/tex/latex/base/utf8.def +File: utf8.def 2008/04/05 v1.1m UTF-8 support for inputenc +Now handling font encoding OML ... +... no UTF-8 mapping file for font encoding OML +Now handling font encoding T1 ... +... processing UTF-8 mapping file for font encoding T1 +(/usr/share/texlive/texmf-dist/tex/latex/base/t1enc.dfu +File: t1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc + defining Unicode char U+00A1 (decimal 161) + defining Unicode char U+00A3 (decimal 163) + defining Unicode char U+00AB (decimal 171) + defining Unicode char U+00BB (decimal 187) + defining Unicode char U+00BF (decimal 191) + defining Unicode char U+00C0 (decimal 192) + defining Unicode char U+00C1 (decimal 193) + defining Unicode char U+00C2 (decimal 194) + defining Unicode char U+00C3 (decimal 195) + defining Unicode char U+00C4 (decimal 196) + defining Unicode char U+00C5 (decimal 197) + defining Unicode char U+00C6 (decimal 198) + defining Unicode char U+00C7 (decimal 199) + defining Unicode char U+00C8 (decimal 200) + defining Unicode char U+00C9 (decimal 201) + defining Unicode char U+00CA (decimal 202) + defining Unicode char U+00CB (decimal 203) + defining Unicode char U+00CC (decimal 204) + defining Unicode char U+00CD (decimal 205) + defining Unicode char U+00CE (decimal 206) + defining Unicode char U+00CF (decimal 207) + defining Unicode char U+00D0 (decimal 208) + defining Unicode char U+00D1 (decimal 209) + defining Unicode char U+00D2 (decimal 210) + defining Unicode char U+00D3 (decimal 211) + defining Unicode char U+00D4 (decimal 212) + defining Unicode char U+00D5 (decimal 213) + defining Unicode char U+00D6 (decimal 214) + defining Unicode char U+00D8 (decimal 216) + defining Unicode char U+00D9 (decimal 217) + defining Unicode char U+00DA (decimal 218) + defining Unicode char U+00DB (decimal 219) + defining Unicode char U+00DC (decimal 220) + defining Unicode char U+00DD (decimal 221) + defining Unicode char U+00DE (decimal 222) + defining Unicode char U+00DF (decimal 223) + defining Unicode char U+00E0 (decimal 224) + defining Unicode char U+00E1 (decimal 225) + defining Unicode char U+00E2 (decimal 226) + defining Unicode char U+00E3 (decimal 227) + defining Unicode char U+00E4 (decimal 228) + defining Unicode char U+00E5 (decimal 229) + defining Unicode char U+00E6 (decimal 230) + defining Unicode char U+00E7 (decimal 231) + defining Unicode char U+00E8 (decimal 232) + defining Unicode char U+00E9 (decimal 233) + defining Unicode char U+00EA (decimal 234) + defining Unicode char U+00EB (decimal 235) + defining Unicode char U+00EC (decimal 236) + defining Unicode char U+00ED (decimal 237) + defining Unicode char U+00EE (decimal 238) + defining Unicode char U+00EF (decimal 239) + defining Unicode char U+00F0 (decimal 240) + defining Unicode char U+00F1 (decimal 241) + defining Unicode char U+00F2 (decimal 242) + defining Unicode char U+00F3 (decimal 243) + defining Unicode char U+00F4 (decimal 244) + defining Unicode char U+00F5 (decimal 245) + defining Unicode char U+00F6 (decimal 246) + defining Unicode char U+00F8 (decimal 248) + defining Unicode char U+00F9 (decimal 249) + defining Unicode char U+00FA (decimal 250) + defining Unicode char U+00FB (decimal 251) + defining Unicode char U+00FC (decimal 252) + defining Unicode char U+00FD (decimal 253) + defining Unicode char U+00FE (decimal 254) + defining Unicode char U+00FF (decimal 255) + defining Unicode char U+0102 (decimal 258) + defining Unicode char U+0103 (decimal 259) + defining Unicode char U+0104 (decimal 260) + defining Unicode char U+0105 (decimal 261) + defining Unicode char U+0106 (decimal 262) + defining Unicode char U+0107 (decimal 263) + defining Unicode char U+010C (decimal 268) + defining Unicode char U+010D (decimal 269) + defining Unicode char U+010E (decimal 270) + defining Unicode char U+010F (decimal 271) + defining Unicode char U+0110 (decimal 272) + defining Unicode char U+0111 (decimal 273) + defining Unicode char U+0118 (decimal 280) + defining Unicode char U+0119 (decimal 281) + defining Unicode char U+011A (decimal 282) + defining Unicode char U+011B (decimal 283) + defining Unicode char U+011E (decimal 286) + defining Unicode char U+011F (decimal 287) + defining Unicode char U+0130 (decimal 304) + defining Unicode char U+0131 (decimal 305) + defining Unicode char U+0132 (decimal 306) + defining Unicode char U+0133 (decimal 307) + defining Unicode char U+0139 (decimal 313) + defining Unicode char U+013A (decimal 314) + defining Unicode char U+013D (decimal 317) + defining Unicode char U+013E (decimal 318) + defining Unicode char U+0141 (decimal 321) + defining Unicode char U+0142 (decimal 322) + defining Unicode char U+0143 (decimal 323) + defining Unicode char U+0144 (decimal 324) + defining Unicode char U+0147 (decimal 327) + defining Unicode char U+0148 (decimal 328) + defining Unicode char U+014A (decimal 330) + defining Unicode char U+014B (decimal 331) + defining Unicode char U+0150 (decimal 336) + defining Unicode char U+0151 (decimal 337) + defining Unicode char U+0152 (decimal 338) + defining Unicode char U+0153 (decimal 339) + defining Unicode char U+0154 (decimal 340) + defining Unicode char U+0155 (decimal 341) + defining Unicode char U+0158 (decimal 344) + defining Unicode char U+0159 (decimal 345) + defining Unicode char U+015A (decimal 346) + defining Unicode char U+015B (decimal 347) + defining Unicode char U+015E (decimal 350) + defining Unicode char U+015F (decimal 351) + defining Unicode char U+0160 (decimal 352) + defining Unicode char U+0161 (decimal 353) + defining Unicode char U+0162 (decimal 354) + defining Unicode char U+0163 (decimal 355) + defining Unicode char U+0164 (decimal 356) + defining Unicode char U+0165 (decimal 357) + defining Unicode char U+016E (decimal 366) + defining Unicode char U+016F (decimal 367) + defining Unicode char U+0170 (decimal 368) + defining Unicode char U+0171 (decimal 369) + defining Unicode char U+0178 (decimal 376) + defining Unicode char U+0179 (decimal 377) + defining Unicode char U+017A (decimal 378) + defining Unicode char U+017B (decimal 379) + defining Unicode char U+017C (decimal 380) + defining Unicode char U+017D (decimal 381) + defining Unicode char U+017E (decimal 382) + defining Unicode char U+200C (decimal 8204) + defining Unicode char U+2013 (decimal 8211) + defining Unicode char U+2014 (decimal 8212) + defining Unicode char U+2018 (decimal 8216) + defining Unicode char U+2019 (decimal 8217) + defining Unicode char U+201A (decimal 8218) + defining Unicode char U+201C (decimal 8220) + defining Unicode char U+201D (decimal 8221) + defining Unicode char U+201E (decimal 8222) + defining Unicode char U+2030 (decimal 8240) + defining Unicode char U+2031 (decimal 8241) + defining Unicode char U+2039 (decimal 8249) + defining Unicode char U+203A (decimal 8250) + defining Unicode char U+2423 (decimal 9251) +) +Now handling font encoding OT1 ... +... processing UTF-8 mapping file for font encoding OT1 +(/usr/share/texlive/texmf-dist/tex/latex/base/ot1enc.dfu +File: ot1enc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc + defining Unicode char U+00A1 (decimal 161) + defining Unicode char U+00A3 (decimal 163) + defining Unicode char U+00B8 (decimal 184) + defining Unicode char U+00BF (decimal 191) + defining Unicode char U+00C5 (decimal 197) + defining Unicode char U+00C6 (decimal 198) + defining Unicode char U+00D8 (decimal 216) + defining Unicode char U+00DF (decimal 223) + defining Unicode char U+00E6 (decimal 230) + defining Unicode char U+00EC (decimal 236) + defining Unicode char U+00ED (decimal 237) + defining Unicode char U+00EE (decimal 238) + defining Unicode char U+00EF (decimal 239) + defining Unicode char U+00F8 (decimal 248) + defining Unicode char U+0131 (decimal 305) + defining Unicode char U+0141 (decimal 321) + defining Unicode char U+0142 (decimal 322) + defining Unicode char U+0152 (decimal 338) + defining Unicode char U+0153 (decimal 339) + defining Unicode char U+2013 (decimal 8211) + defining Unicode char U+2014 (decimal 8212) + defining Unicode char U+2018 (decimal 8216) + defining Unicode char U+2019 (decimal 8217) + defining Unicode char U+201C (decimal 8220) + defining Unicode char U+201D (decimal 8221) +) +Now handling font encoding OMS ... +... processing UTF-8 mapping file for font encoding OMS +(/usr/share/texlive/texmf-dist/tex/latex/base/omsenc.dfu +File: omsenc.dfu 2008/04/05 v1.1m UTF-8 support for inputenc + defining Unicode char U+00A7 (decimal 167) + defining Unicode char U+00B6 (decimal 182) + defining Unicode char U+00B7 (decimal 183) + defining Unicode char U+2020 (decimal 8224) + defining Unicode char U+2021 (decimal 8225) + defining Unicode char U+2022 (decimal 8226) +) +Now handling font encoding OMX ... +... no UTF-8 mapping file for font encoding OMX +Now handling font encoding U ... +... no UTF-8 mapping file for font encoding U + defining Unicode char U+00A9 (decimal 169) + defining Unicode char U+00AA (decimal 170) + defining Unicode char U+00AE (decimal 174) + defining Unicode char U+00BA (decimal 186) + defining Unicode char U+02C6 (decimal 710) + defining Unicode char U+02DC (decimal 732) + defining Unicode char U+200C (decimal 8204) + defining Unicode char U+2026 (decimal 8230) + defining Unicode char U+2122 (decimal 8482) + defining Unicode char U+2423 (decimal 9251) +)) (/usr/share/texlive/texmf-dist/tex/latex/graphics/color.sty +Package: color 2005/11/14 v1.0j Standard LaTeX Color (DPC) +(/usr/share/texlive/texmf-dist/tex/latex/latexconfig/color.cfg +File: color.cfg 2007/01/18 v1.5 color configuration of teTeX/TeXLive +) +Package color Info: Driver file: dvips.def on input line 130. +(/usr/share/texlive/texmf-dist/tex/latex/graphics/dvips.def +File: dvips.def 1999/02/16 v3.0i Driver-dependant file (DPC,SPQR) +) (/usr/share/texlive/texmf-dist/tex/latex/graphics/dvipsnam.def +File: dvipsnam.def 1999/02/16 v3.0i Driver-dependant file (DPC,SPQR) +)) + +! LaTeX Error: Option clash for package inputenc. + +See the LaTeX manual or LaTeX Companion for explanation. +Type H for immediate help. + ... + +l.24 + +The package inputenc has already been loaded with options: + [utf8] +There has now been an attempt to load it with options + [latin1] +Adding the global options: + utf8,latin1 +to your \documentclass declaration may fix this. +Try typing to proceed. + +\sizebox=\box26 +\lthtmlwrite=\write3 +(./images.aux) +\openout1 = `images.aux'. + +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 123. +LaTeX Font Info: ... okay on input line 123. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 123. +LaTeX Font Info: ... okay on input line 123. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 123. +LaTeX Font Info: ... okay on input line 123. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 123. +LaTeX Font Info: ... okay on input line 123. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 123. +LaTeX Font Info: ... okay on input line 123. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 123. +LaTeX Font Info: ... okay on input line 123. + +latex2htmlLength hsize=349.0pt + +latex2htmlLength vsize=682.0pt + +latex2htmlLength hoffset=0.0pt + +latex2htmlLength voffset=0.0pt + +latex2htmlLength topmargin=0.0pt + +latex2htmlLength topskip=0.00003pt + +latex2htmlLength headheight=0.0pt + +latex2htmlLength headsep=0.0pt + +latex2htmlLength parskip=0.0pt plus 1.0pt + +latex2htmlLength oddsidemargin=53.0pt + +latex2htmlLength evensidemargin=53.0pt + +LaTeX Font Info: Try loading font information for T1+cmtt on input line 153. + +(/usr/share/texlive/texmf-dist/tex/latex/base/t1cmtt.fd +File: t1cmtt.fd 1999/05/25 v2.5h Standard LaTeX font definitions +) +l2hSize :figure26:238.0pt::0.0pt::349.0pt. +[1 + + + +] +l2hSize :figure31:163.55518pt::0.0pt::349.0pt. +[2 + + +] +Overfull \hbox (63.67963pt too wide) in paragraph at lines 235--235 +[]\T1/cmtt/m/n/7 0005 04 W goni lem:goni¢,V/AiMdNsP3TfrVp gph:2:1 dgp:8;s;--cmp +l_ga-7/3,--cmpl_ga-10/3,--prep-11/8;!subj!cmpl_ga[] + [] + + +Overfull \hbox (45.09045pt too wide) in paragraph at lines 235--235 +[]\T1/cmtt/m/n/7 0005 04 W goni lem:goni¢,V/AiMdNsP3TfrVp gph:2:1 dgp:9;s;--cmp +l_ga-7/2,--cmpl_ga-10/2,--prep-11/9;!cmpl_ga[] + [] + +l2hSize :figure36:195.55518pt::0.0pt::349.0pt. +[3 + + +] (./images.aux) ) +Here is how much of TeX's memory you used: + 1035 strings out of 495025 + 11803 string characters out of 3181177 + 58180 words of memory out of 3000000 + 4265 multiletter control sequences out of 15000+200000 + 5797 words of font info for 18 fonts, out of 3000000 for 9000 + 34 hyphenation exceptions out of 8191 + 23i,5n,19p,4058b,297s stack positions out of 5000i,500n,10000p,200000b,50000s + +Output written on images.dvi (3 pages, 3420 bytes). diff --git a/doc/dgp/images.pl b/doc/dgp/images.pl new file mode 100644 index 0000000..e08fb49 --- /dev/null +++ b/doc/dgp/images.pl @@ -0,0 +1,6 @@ +# LaTeX2HTML 2008 (1.71) +# Associate images original text with physical files. + + +1; + diff --git a/doc/dgp/images.tex b/doc/dgp/images.tex new file mode 100644 index 0000000..3a9facc --- /dev/null +++ b/doc/dgp/images.tex @@ -0,0 +1,242 @@ +\batchmode +\documentclass[a4paper]{report} +\RequirePackage{ifthen} + + + + +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} + + +\title{DGP} +\author{Tomasz ObrÄ™bski} + + + + +\usepackage[dvips]{color} + + +\pagecolor[gray]{.7} + +\usepackage[latin1]{inputenc} + + + +\makeatletter + +\makeatletter +\count@=\the\catcode`\_ \catcode`\_=8 +\newenvironment{tex2html_wrap}{}{}% +\catcode`\<=12\catcode`\_=\count@ +\newcommand{\providedcommand}[1]{\expandafter\providecommand\csname #1\endcsname}% +\newcommand{\renewedcommand}[1]{\expandafter\providecommand\csname #1\endcsname{}% + \expandafter\renewcommand\csname #1\endcsname}% +\newcommand{\newedenvironment}[1]{\newenvironment{#1}{}{}\renewenvironment{#1}}% +\let\newedcommand\renewedcommand +\let\renewedenvironment\newedenvironment +\makeatother +\let\mathon=$ +\let\mathoff=$ +\ifx\AtBeginDocument\undefined \newcommand{\AtBeginDocument}[1]{}\fi +\newbox\sizebox +\setlength{\hoffset}{0pt}\setlength{\voffset}{0pt} +\addtolength{\textheight}{\footskip}\setlength{\footskip}{0pt} +\addtolength{\textheight}{\topmargin}\setlength{\topmargin}{0pt} +\addtolength{\textheight}{\headheight}\setlength{\headheight}{0pt} +\addtolength{\textheight}{\headsep}\setlength{\headsep}{0pt} +\setlength{\textwidth}{349pt} +\newwrite\lthtmlwrite +\makeatletter +\let\realnormalsize=\normalsize +\global\topskip=2sp +\def\preveqno{}\let\real@float=\@float \let\realend@float=\end@float +\def\@float{\let\@savefreelist\@freelist\real@float} +\def\liih@math{\ifmmode$\else\bad@math\fi} +\def\end@float{\realend@float\global\let\@freelist\@savefreelist} +\let\real@dbflt=\@dbflt \let\end@dblfloat=\end@float +\let\@largefloatcheck=\relax +\let\if@boxedmulticols=\iftrue +\def\@dbflt{\let\@savefreelist\@freelist\real@dbflt} +\def\adjustnormalsize{\def\normalsize{\mathsurround=0pt \realnormalsize + \parindent=0pt\abovedisplayskip=0pt\belowdisplayskip=0pt}% + \def\phantompar{\csname par\endcsname}\normalsize}% +\def\lthtmltypeout#1{{\let\protect\string \immediate\write\lthtmlwrite{#1}}}% +\newcommand\lthtmlhboxmathA{\adjustnormalsize\setbox\sizebox=\hbox\bgroup\kern.05em }% +\newcommand\lthtmlhboxmathB{\adjustnormalsize\setbox\sizebox=\hbox to\hsize\bgroup\hfill }% +\newcommand\lthtmlvboxmathA{\adjustnormalsize\setbox\sizebox=\vbox\bgroup % + \let\ifinner=\iffalse \let\)\liih@math }% +\newcommand\lthtmlboxmathZ{\@next\next\@currlist{}{\def\next{\voidb@x}}% + \expandafter\box\next\egroup}% +\newcommand\lthtmlmathtype[1]{\gdef\lthtmlmathenv{#1}}% +\newcommand\lthtmllogmath{\dimen0\ht\sizebox \advance\dimen0\dp\sizebox + \ifdim\dimen0>.95\vsize + \lthtmltypeout{% +*** image for \lthtmlmathenv\space is too tall at \the\dimen0, reducing to .95 vsize ***}% + \ht\sizebox.95\vsize \dp\sizebox\z@ \fi + \lthtmltypeout{l2hSize % +:\lthtmlmathenv:\the\ht\sizebox::\the\dp\sizebox::\the\wd\sizebox.\preveqno}}% +\newcommand\lthtmlfigureA[1]{\let\@savefreelist\@freelist + \lthtmlmathtype{#1}\lthtmlvboxmathA}% +\newcommand\lthtmlpictureA{\bgroup\catcode`\_=8 \lthtmlpictureB}% +\newcommand\lthtmlpictureB[1]{\lthtmlmathtype{#1}\egroup + \let\@savefreelist\@freelist \lthtmlhboxmathB}% +\newcommand\lthtmlpictureZ[1]{\hfill\lthtmlfigureZ}% +\newcommand\lthtmlfigureZ{\lthtmlboxmathZ\lthtmllogmath\copy\sizebox + \global\let\@freelist\@savefreelist}% +\newcommand\lthtmldisplayA{\bgroup\catcode`\_=8 \lthtmldisplayAi}% +\newcommand\lthtmldisplayAi[1]{\lthtmlmathtype{#1}\egroup\lthtmlvboxmathA}% +\newcommand\lthtmldisplayB[1]{\edef\preveqno{(\theequation)}% + \lthtmldisplayA{#1}\let\@eqnnum\relax}% +\newcommand\lthtmldisplayZ{\lthtmlboxmathZ\lthtmllogmath\lthtmlsetmath}% +\newcommand\lthtmlinlinemathA{\bgroup\catcode`\_=8 \lthtmlinlinemathB} +\newcommand\lthtmlinlinemathB[1]{\lthtmlmathtype{#1}\egroup\lthtmlhboxmathA + \vrule height1.5ex width0pt }% +\newcommand\lthtmlinlineA{\bgroup\catcode`\_=8 \lthtmlinlineB}% +\newcommand\lthtmlinlineB[1]{\lthtmlmathtype{#1}\egroup\lthtmlhboxmathA}% +\newcommand\lthtmlinlineZ{\egroup\expandafter\ifdim\dp\sizebox>0pt % + \expandafter\centerinlinemath\fi\lthtmllogmath\lthtmlsetinline} +\newcommand\lthtmlinlinemathZ{\egroup\expandafter\ifdim\dp\sizebox>0pt % + \expandafter\centerinlinemath\fi\lthtmllogmath\lthtmlsetmath} +\newcommand\lthtmlindisplaymathZ{\egroup % + \centerinlinemath\lthtmllogmath\lthtmlsetmath} +\def\lthtmlsetinline{\hbox{\vrule width.1em \vtop{\vbox{% + \kern.1em\copy\sizebox}\ifdim\dp\sizebox>0pt\kern.1em\else\kern.3pt\fi + \ifdim\hsize>\wd\sizebox \hrule depth1pt\fi}}} +\def\lthtmlsetmath{\hbox{\vrule width.1em\kern-.05em\vtop{\vbox{% + \kern.1em\kern0.8 pt\hbox{\hglue.17em\copy\sizebox\hglue0.8 pt}}\kern.3pt% + \ifdim\dp\sizebox>0pt\kern.1em\fi \kern0.8 pt% + \ifdim\hsize>\wd\sizebox \hrule depth1pt\fi}}} +\def\centerinlinemath{% + \dimen1=\ifdim\ht\sizebox<\dp\sizebox \dp\sizebox\else\ht\sizebox\fi + \advance\dimen1by.5pt \vrule width0pt height\dimen1 depth\dimen1 + \dp\sizebox=\dimen1\ht\sizebox=\dimen1\relax} + +\def\lthtmlcheckvsize{\ifdim\ht\sizebox<\vsize + \ifdim\wd\sizebox<\hsize\expandafter\hfill\fi \expandafter\vfill + \else\expandafter\vss\fi}% +\providecommand{\selectlanguage}[1]{}% +\makeatletter \tracingstats = 1 + + +\begin{document} +\pagestyle{empty}\thispagestyle{empty}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength hsize=\the\hsize}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength vsize=\the\vsize}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength hoffset=\the\hoffset}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength voffset=\the\voffset}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength topmargin=\the\topmargin}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength topskip=\the\topskip}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength headheight=\the\headheight}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength headsep=\the\headsep}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength parskip=\the\parskip}\lthtmltypeout{}% +\lthtmltypeout{latex2htmlLength oddsidemargin=\the\oddsidemargin}\lthtmltypeout{}% +\makeatletter +\if@twoside\lthtmltypeout{latex2htmlLength evensidemargin=\the\evensidemargin}% +\else\lthtmltypeout{latex2htmlLength evensidemargin=\the\oddsidemargin}\fi% +\lthtmltypeout{}% +\makeatother +\setcounter{page}{1} +\onecolumn + +% !!! IMAGES START HERE !!! + +\bgroup \egroup +\stepcounter{chapter} +\stepcounter{chapter} +\stepcounter{chapter} +\stepcounter{chapter} +\stepcounter{chapter} +{\newpage\clearpage +\lthtmlfigureA{figure26}% +\begin{figure}\begin{verbatim} + +0000 00 BOS * +0000 04 W Pies lem:pies,N/CnGaNs +0004 01 S _ +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp +0009 01 S _ +0010 08 W czarnego lem:czarny,ADJ/CaDpGapNs +0010 08 W czarnego lem:czarny,ADJ/CgDpGainpNs +0018 01 S _ +0019 04 W kota lem:kota,N/CnGfNs +0019 04 W kota lem:kot,N/CaGaNs +0019 04 W kota lem:kot,N/CgGaNs +0023 01 S _ +0024 01 W w lem:w,P/Cal +0025 01 S _ +0026 06 W butach lem:buta,N/ClGfNp +0026 06 W butach lem:but,N/ClGiNp +0032 01 P . +0033 01 S \n +0034 00 EOS *\end{verbatim} + +\end{figure}% +\lthtmlfigureZ +\lthtmlcheckvsize\clearpage} + +{\newpage\clearpage +\lthtmlfigureA{figure31}% +\begin{figure}\scriptsize +\begin{verbatim} + +0000 00 BOS * gph:0: +0000 04 W Pies lem:pies,N/CnGaNs gph:1:0 +0004 01 S _ +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 +0009 01 S _ +0010 08 W czarnego lem:czarny,ADJ/CaDpGapNs gph:3:2 +0010 08 W czarnego lem:czarny,ADJ/CgDpGainpNs gph:4:2 +0018 01 S _ +0019 04 W kota lem:kota,N/CnGfNs gph:5:3,4 +0019 04 W kota lem:kot,N/CaGaNs gph:6:3,4 +0019 04 W kota lem:kot,N/CgGaNs gph:7:3,4 +0023 01 S _ +0024 01 W w lem:w,P/Cal gph:8:5,6,7 +0025 01 S _ +0026 06 W butach lem:buta,N/ClGfNp gph:9:8 +0026 06 W butach lem:but,N/ClGiNp gph:10:8 +0032 01 P . +0033 01 S \n +0034 00 EOS * gph:11:9,10\end{verbatim} + +\end{figure}% +\lthtmlfigureZ +\lthtmlcheckvsize\clearpage} + +{\newpage\clearpage +\lthtmlfigureA{figure36}% +\begin{figure} \scriptsize +\begin{verbatim} + +0000 00 BOS * gph:0: dgp:0;s;; +0000 04 W Pies lem:pies,N/CnGaNs gph:1:0 dgp:1;s;; +0004 01 S _ +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 dgp:2;s;; +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 dgp:3;s;--subj-1/2;!subj +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 dgp:8;s;--cmpl_ga-7/3,--cmpl_ga-10/3,--prep-11/8;!subj!cmpl_ga +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:2:1 dgp:9;s;--cmpl_ga-7/2,--cmpl_ga-10/2,--prep-11/9;!cmpl_ga +0009 01 S _ +0010 08 W czarnego lem:czarny,ADJ/CaDpGapNs gph:3:2 dgp:4;s;; +0010 08 W czarnego lem:czarny,ADJ/CgDpGainpNs gph:4:2 dgp:5;s;; +0018 01 S _ +0019 04 W kota lem:kota,N/CnGfNs gph:5:3,4 dgp:6;s;--prep-11/6; +0019 04 W kota lem:kot,N/CaGaNs gph:6:3,4 dgp:7;s;--mod-4/7,--prep-11/7; +0019 04 W kota lem:kot,N/CgGaNs gph:7:3,4 dgp:10;s;--mod-5/10,--prep-11/10; +0023 01 S _ +0024 01 W w lem:w,P/Cal gph:8:5,6,7 dgp:11;u;;&pcmpl +0024 01 W w lem:w,P/Cal gph:8:5,6,7 dgp:13;s;--pcmpl-12/11,--pcmpl-14/11;!pcmpl +0025 01 S _ +0026 06 W butach lem:buta,N/ClGfNp gph:9:8 dgp:12;s;; +0026 06 W butach lem:but,N/ClGiNp gph:10:8 dgp:14;s;; +0032 01 P . +0033 01 S \n +0034 00 EOS * gph:11:9,10 dgp:15;s;;\end{verbatim} + +\end{figure}% +\lthtmlfigureZ +\lthtmlcheckvsize\clearpage} + + +\end{document} diff --git a/doc/dgp/index.html b/doc/dgp/index.html new file mode 100644 index 0000000..a949906 --- /dev/null +++ b/doc/dgp/index.html @@ -0,0 +1,72 @@ + + + + + +DGP + + + + + + + + + + + + + + + + + + +

DGP

+
+ +

Tomasz Obrębski

+
+

+ + + + + +

+
+to +2014-12-19 +
+ + diff --git a/doc/dgp/labels.pl b/doc/dgp/labels.pl new file mode 100644 index 0000000..e5b868d --- /dev/null +++ b/doc/dgp/labels.pl @@ -0,0 +1,13 @@ +# LaTeX2HTML 2008 (1.71) +# Associate labels original text with physical files. + + +1; + + +# LaTeX2HTML 2008 (1.71) +# labels from external_latex_labels array. + + +1; + diff --git a/doc/dgp/missfont.log b/doc/dgp/missfont.log new file mode 100644 index 0000000..a45f2b1 --- /dev/null +++ b/doc/dgp/missfont.log @@ -0,0 +1 @@ +mktexpk --mfmode ljfour --bdpi 8000 --mag 0+7000/(2*4000) --dpi 7000 ectt0800 diff --git a/doc/dgp/node1.html b/doc/dgp/node1.html new file mode 100644 index 0000000..043bc82 --- /dev/null +++ b/doc/dgp/node1.html @@ -0,0 +1,63 @@ + + + + + +Introduction + + + + + + + + + + + + + + + + + + + + + + +

+Introduction +

+

+
+to +2014-12-19 +
+ + diff --git a/doc/dgp/node2.html b/doc/dgp/node2.html new file mode 100644 index 0000000..34c44d1 --- /dev/null +++ b/doc/dgp/node2.html @@ -0,0 +1,63 @@ + + + + + +Grammar + + + + + + + + + + + + + + + + + + + + + + +

+Grammar +

+

+
+to +2014-12-19 +
+ + diff --git a/doc/dgp/node3.html b/doc/dgp/node3.html new file mode 100644 index 0000000..8d51872 --- /dev/null +++ b/doc/dgp/node3.html @@ -0,0 +1,63 @@ + + + + + +Parsing algorithm + + + + + + + + + + + + + + + + + + + + + + +

+Parsing algorithm +

+

+
+to +2014-12-19 +
+ + diff --git a/doc/dgp/node4.html b/doc/dgp/node4.html new file mode 100644 index 0000000..bfcfb55 --- /dev/null +++ b/doc/dgp/node4.html @@ -0,0 +1,76 @@ + + + + + +Input + + + + + + + + + + + + + + + + + + + + + + +

+Input +

+ +Wejście dla parsera przygotowuje się w następujący sposób: +
+cat text.txt | tok | sen | lem | canonize | gph | dgp ...
+
+ +Plik wejściowy + +dgp bierze na wejściu graf słów (wordgraph). Numery wierzchołków tego +grafu to wartości pola gph. Pole to jest wprowadzane do pliku przez +program gph. + +Poza polem gph, dgp odczytuje też wartość pola lem. +

+
+to +2014-12-19 +
+ + diff --git a/doc/dgp/node5.html b/doc/dgp/node5.html new file mode 100644 index 0000000..e27e5b9 --- /dev/null +++ b/doc/dgp/node5.html @@ -0,0 +1,204 @@ + + + + + +Output + + + + + + + + + + + + + + + + + + + + + + +

+Output +

+ +Format: + +0005 04 W goni lem:gonić,V/AiMdNsP3TfrVp gph:4:1,2,3 dgp:6;s + +
+dgp:<node>;<saturation>[;<links>][;<sets>][;<constraints>]
+
+ +
+
node
+
Dependency graph node number. +
+
saturation
+
The information whether the node is saturated. A + node is saturated if the list of required connections for this node + is empty, it is unsaturated otherwise. +
+
links
+
The comma separated list of connections. For each node + either the list of its dependents or the list of its heads may be + printed, or both (this dependes on the value of the --info + parameter). +
+
sets
+
For each node, the sets of all its left neighbours, + transitive left heads, transitive left dependents, and nodes visible + on the left can be printed. (This information is useful for fast + tree generation.) +
+
constraints
+
the information on constraints imposed on the + node. Constraints follow from the SGL and REQ grammar rules and have + the form of a comma-separated list of dependency types required by + the node and forbidden for the node. The elements of the list have + the following format: + + + + + + + + +
!dependency typedependency type is required
&dependency typedependency type is forbidden
+
+
+ +Wynikiem pracy dgp jest graf zależności. Graf ten może zawierać +(zwykle tak jest) więcej wierzchołków niż graf wejściowy. + +* numer wierzchołka w wyjściowym grafie zależności + +Numery wierzchołków w wyjściowym grafie są inne. Podczas działania +parser tworzy kopie (klony) wierzchołków wejściowych. Dzieje się tak w +sytuacji, kiedy do wierzchołka (jako nadrzędnika) dowiazywana jest +zależnośc objęta ograniczeniami. Ograniczenia wynikają z reguł +gramatyki SGL i OBL. + +SGL - zależność jednokrotna +OBL - zależność obligatiryjna + +node saturation s or u + +s - wierzchołek nasycony + u - wierzchołek nienasycony + +Wierzchołek nienasycony to taki, któremu brakuje obowiązkowy podrzędnik. + Obowiązkowe podrzędniki określane są w regułach OBL gramatyki. + +connections + * connection list + connections are lista zależności zawiera oddzielony przecinkami ciąg wyrażeń + +-<typ>-<w1>/<w2> + +jeśli w wywołaniu programu dla parametru -info podano wśród wartości 'd' + (od dependents) + +lub + +++<typ>-<w1>/<w2> + +jeśli w wywołaniu programu dla parametru -info podano wśród wartości 'h' + +Może też zawierac oba typy wyrażeń, jeśli podano zarówno 'd' jak i 'h'. + +Wyrażenie + +-<typ>-<w1>/<w2> + +oznacza możliwość istnienia zależności typu <typ>, której nadrzędnikiem jest aktualny wierzchołek, a podrzędnikiem + wierzchołek <w1> (o <w2> za chwilę). + +pies goni czarnego kota w butach. + +
+ + + +
Figure: +output of tok | sen | lem | canonize
+
+ +
+ + + +
Figure 5.2: +Word graph representation: sentence annotated with gph.
+
+ +
+ + + +
Figure 5.3: +dgp output
+
+ + + +
+to +2014-12-19 +
+ + diff --git a/doc/dgp/node6.html b/doc/dgp/node6.html new file mode 100644 index 0000000..99a8d5e --- /dev/null +++ b/doc/dgp/node6.html @@ -0,0 +1,74 @@ + + + + + +About this document ... + + + + + + + + + + + + + + + + + + + + +

+About this document ... +

+ DGP

+This document was generated using the +LaTeX2HTML translator Version 2008 (1.71) +

+Copyright © 1993, 1994, 1995, 1996, +Nikos Drakos, +Computer Based Learning Unit, University of Leeds. +
+Copyright © 1997, 1998, 1999, +Ross Moore, +Mathematics Department, Macquarie University, Sydney. +

+The command line arguments were:
+ latex2html dgp.tex +

+The translation was initiated by to on 2014-12-19 +


+
+to +2014-12-19 +
+ + diff --git a/lib/ser.l.template b/lib/ser.l.template index 1c72081..4a42b61 100644 --- a/lib/ser.l.template +++ b/lib/ser.l.template @@ -11,19 +11,25 @@ PATTERN { if(yytext[yyleng-1]!='\n') {fprintf(stderr,"ser: pattern matches incomplete line\n"); exit(1);} n++; - sscanf(yytext,"%d %d",&start,&len); - yytext[yyleng-1]='\0'; + if( sscanf(yytext,"%d %d",&start,&len) != 2 ) {start=-1; len=-1;}; + yytext[yyleng-1]='\0'; if(tmp=strrchr(yytext,'\n')) { lastseg=tmp+1; - sscanf(lastseg,"%d %d", &end, &len); + if( sscanf(lastseg,"%d %d",&end,&len) != 2 ) {start=-1; len=-1;}; } else end=start; yytext[yyleng-1]='\n'; - printf("%04d 00 BOM * ser:%d\n",start,n); + if(start >= 0 && end >=0) + printf("%04d 00 BOM * ser:%d\n",start,n); + else + printf("BOM * ser:%d\n",n); ECHO; - printf("%04d 00 EOM * ser:%d\n",end+len,n); + if(start>=0 && end >=0) + printf("%04d 00 EOM * ser:%d\n",end+len,n); + else + printf("EOM * ser:%d\n",n); } diff --git a/share/Makefile b/share/Makefile index 0a3c840..4226e67 100644 --- a/share/Makefile +++ b/share/Makefile @@ -15,7 +15,7 @@ gram.dgp: gram.dgc .PHONY: install -install: +install: install-dictionaries .PHONY: install-grammar install-grammar: @@ -26,31 +26,31 @@ install-grammar: install-dictionaries: ifdef LANG_DIR install -d $(LANG_DIR)/pl_PL.ISO-8859-2 - install -d $(LANG_DIR)/pl_PL.UTF-8 + # install -d $(LANG_DIR)/pl_PL.UTF-8 install -m 0644 pl_PL.ISO-8859-2/cor.bin $(LANG_DIR)/pl_PL.ISO-8859-2 install -m 0644 pl_PL.ISO-8859-2/gue.bin $(LANG_DIR)/pl_PL.ISO-8859-2 install -m 0644 pl_PL.ISO-8859-2/lem.bin $(LANG_DIR)/pl_PL.ISO-8859-2 - install -m 0644 pl_PL.ISO-8859-2/lem.fst $(LANG_DIR)/pl_PL.ISO-8859-2 + # install -m 0644 pl_PL.ISO-8859-2/lem.fst $(LANG_DIR)/pl_PL.ISO-8859-2 install -m 0644 pl_PL.ISO-8859-2/lem.cats $(LANG_DIR)/pl_PL.ISO-8859-2 install -m 0644 pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym $(LANG_DIR)/pl_PL.ISO-8859-2 - install -m 0644 pl_PL.UTF-8/lem.bin $(LANG_DIR)/pl_PL.UTF-8 + # install -m 0644 pl_PL.UTF-8/lem.bin $(LANG_DIR)/pl_PL.UTF-8 install -m 0644 weights.kor $(LANG_DIR) endif .PHONY: uninstall uninstall: ifdef LANG_DIR - rm $(LANG_DIR)/weights.kor - rm $(LANG_DIR)/gram.* - rm $(LANG_DIR)/pl_PL.UTF-8/lem.bin - rm $(LANG_DIR)/pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym - rm $(LANG_DIR)/pl_PL.ISO-8859-2/lem.cats - rm $(LANG_DIR)/pl_PL.ISO-8859-2/lem.bin - rm $(LANG_DIR)/pl_PL.ISO-8859-2/lem.fst - rm $(LANG_DIR)/pl_PL.ISO-8859-2/gue.bin - rm $(LANG_DIR)/pl_PL.ISO-8859-2/cor.bin + rm -f $(LANG_DIR)/weights.kor + rm -f $(LANG_DIR)/gram.* + rm -f $(LANG_DIR)/pl_PL.UTF-8/lem.bin + rm -f $(LANG_DIR)/pl_PL.ISO-8859-2/pl_PL.ISO-8859-2.sym + rm -f $(LANG_DIR)/pl_PL.ISO-8859-2/lem.cats + rm -f $(LANG_DIR)/pl_PL.ISO-8859-2/lem.bin + # rm -f $(LANG_DIR)/pl_PL.ISO-8859-2/lem.fst + rm -f $(LANG_DIR)/pl_PL.ISO-8859-2/gue.bin + rm -f $(LANG_DIR)/pl_PL.ISO-8859-2/cor.bin rmdir $(LANG_DIR)/pl_PL.ISO-8859-2 - rmdir $(LANG_DIR)/pl_PL.UTF-8 + # rmdir $(LANG_DIR)/pl_PL.UTF-8 endif diff --git a/share/pl_PL.ISO-8859-2/Makefile b/share/pl_PL.ISO-8859-2/Makefile index fae3177..b69567c 100644 --- a/share/pl_PL.ISO-8859-2/Makefile +++ b/share/pl_PL.ISO-8859-2/Makefile @@ -1,9 +1,9 @@ include ../../config.mak -#TARGETS = lem.bin lem.cats cor.bin gue.bin +TARGETS = lem.bin lem.cats cor.bin gue.bin .PHONY: all -all: $(TARGETS) +all: # ------------------------------------------------------------------ # main section @@ -21,4 +21,4 @@ lem.cats: lem.dic .PHONY: clean clean: - rm -f lem.bin lem.fst lem.cats + rm -f lem.fst lem.cats diff --git a/src/compdic/Makefile b/src/compdic/Makefile index 07120a5..863b5d8 100644 --- a/src/compdic/Makefile +++ b/src/compdic/Makefile @@ -21,7 +21,6 @@ ifdef BIN_DIR install -m 0755 compdic-dic-to-fst $(BIN_DIR) install -m 0755 compdic-dic-to-cats $(BIN_DIR) install -m 0755 compdic-fst-to-bin $(BIN_DIR) - install -m 0755 canonize $(BIN_DIR) install -m 0755 fsm2aut $(BIN_DIR) install -m 0755 aut2fsa $(BIN_DIR) diff --git a/src/compdic/canonize b/src/compdic/canonize new file mode 100755 index 0000000..f9bd7b3 --- /dev/null +++ b/src/compdic/canonize @@ -0,0 +1,50 @@ +#!/usr/bin/perl + +#package: UAM TExt Tools +#component: canonize +#version: 1.0 +#author: Tomasz Obrebski + +use lib "/usr/local/lib/utt"; +use lib "$ENV{'HOME'}/.local/lib/utt"; + +use strict; +use Getopt::Long; +use attr; + + +my $help; + +GetOptions("help|h" => \$help); + +if($help) +{ + print <<'END' + +Transforms syntactic categories to their canonical form. + +Usage: canonize + +Options: + --help -h Help. + +END +; + exit 0; +} + +#$|=1; + +my %tra; + +while(<>) +{ + s/$attr::pos_re\/$attr::avlist_re/trans($&)/ge; + print; +} + +sub trans +{ + my $cat=shift; + exists($tra{$cat}) ? $tra{$cat} : ( $tra{$cat} = attr::canonize $cat ); +} diff --git a/src/dgp/cmdline.cc b/src/dgp/cmdline.cc deleted file mode 100644 index af61620..0000000 --- a/src/dgp/cmdline.cc +++ /dev/null @@ -1,1556 +0,0 @@ -/* - File autogenerated by gengetopt version 2.22.6 - generated with the following command: - gengetopt -i cmdline.ggo --c-extension=cc --conf-parser - - The developers of gengetopt consider the fixed text that goes in all - gengetopt output files to be in the public domain: - we make no copyright claims on it. -*/ - -/* If we use autoconf. */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include - -#ifndef FIX_UNUSED -#define FIX_UNUSED(X) (void) (X) /* avoid warnings for unused params */ -#endif - -#include - -#include "cmdline.h" - -const char *gengetopt_args_info_purpose = ""; - -const char *gengetopt_args_info_usage = "Usage: dgp [OPTIONS]..."; - -const char *gengetopt_args_info_versiontext = ""; - -const char *gengetopt_args_info_description = ""; - -const char *gengetopt_args_info_full_help[] = { - " -h, --help Print help and exit", - " --full-help Print help, including hidden options, and exit", - " -V, --version Print version and exit", - " -g, --grammar=filename Grammar file", - " -l, --long Long output (default=off)", - " -d, --debug Debug mode. (default=off)", - " --time Print parse time. (default=off)", - " --info=STRING Print info. \n h - heads d - dependents\n s - sets\n c - constraints n - node/arc counts\n (default=`h')", - " -f, --input=STRING Input file", - " -o, --output=STRING Output file", - " --only-fail Print only segments the program failed to process\n (default=off)", - " --no-fail Print only segments the program processed\n (default=off)", - " -c, --copy Copy succesfully processed segments to output\n (default=off)", - " -p, --process=STRING Process segments of this type only", - " -s, --select=STRING Select only segments containing this field", - " -S, --ignore=STRING Select only segments, which doesn't contain this\n field", - " -O, --output-field=STRING Output field name (default: program name)", - " -I, --input-field=STRING Input field name (default: the FORM field)", - " -i, --interactive Toggle interactive mode (default=off)", - " --config=FILENAME Configuration file", - " -1, --one-field Print all alternative results in one field\n (creates compact ambiguous annotation)\n (default=off)", - " --one-line Print annotation alternatives as additional fields\n in the same segment (default=off)", - " --language=STRING Language.", - 0 -}; - -static void -init_help_array(void) -{ - gengetopt_args_info_help[0] = gengetopt_args_info_full_help[0]; - gengetopt_args_info_help[1] = gengetopt_args_info_full_help[1]; - gengetopt_args_info_help[2] = gengetopt_args_info_full_help[2]; - gengetopt_args_info_help[3] = gengetopt_args_info_full_help[3]; - gengetopt_args_info_help[4] = gengetopt_args_info_full_help[4]; - gengetopt_args_info_help[5] = gengetopt_args_info_full_help[5]; - gengetopt_args_info_help[6] = gengetopt_args_info_full_help[6]; - gengetopt_args_info_help[7] = gengetopt_args_info_full_help[7]; - gengetopt_args_info_help[8] = gengetopt_args_info_full_help[8]; - gengetopt_args_info_help[9] = gengetopt_args_info_full_help[9]; - gengetopt_args_info_help[10] = gengetopt_args_info_full_help[12]; - gengetopt_args_info_help[11] = gengetopt_args_info_full_help[13]; - gengetopt_args_info_help[12] = gengetopt_args_info_full_help[14]; - gengetopt_args_info_help[13] = gengetopt_args_info_full_help[15]; - gengetopt_args_info_help[14] = gengetopt_args_info_full_help[16]; - gengetopt_args_info_help[15] = gengetopt_args_info_full_help[17]; - gengetopt_args_info_help[16] = gengetopt_args_info_full_help[18]; - gengetopt_args_info_help[17] = gengetopt_args_info_full_help[19]; - gengetopt_args_info_help[18] = gengetopt_args_info_full_help[20]; - gengetopt_args_info_help[19] = gengetopt_args_info_full_help[21]; - gengetopt_args_info_help[20] = gengetopt_args_info_full_help[22]; - gengetopt_args_info_help[21] = 0; - -} - -const char *gengetopt_args_info_help[22]; - -typedef enum {ARG_NO - , ARG_FLAG - , ARG_STRING -} cmdline_parser_arg_type; - -static -void clear_given (struct gengetopt_args_info *args_info); -static -void clear_args (struct gengetopt_args_info *args_info); - -static int -cmdline_parser_internal (int argc, char **argv, struct gengetopt_args_info *args_info, - struct cmdline_parser_params *params, const char *additional_error); - -static int -cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error); -struct line_list -{ - char * string_arg; - struct line_list * next; -}; - -static struct line_list *cmd_line_list = 0; -static struct line_list *cmd_line_list_tmp = 0; - -static void -free_cmd_list(void) -{ - /* free the list of a previous call */ - if (cmd_line_list) - { - while (cmd_line_list) { - cmd_line_list_tmp = cmd_line_list; - cmd_line_list = cmd_line_list->next; - free (cmd_line_list_tmp->string_arg); - free (cmd_line_list_tmp); - } - } -} - - -static char * -gengetopt_strdup (const char *s); - -static -void clear_given (struct gengetopt_args_info *args_info) -{ - args_info->help_given = 0 ; - args_info->full_help_given = 0 ; - args_info->version_given = 0 ; - args_info->grammar_given = 0 ; - args_info->long_given = 0 ; - args_info->debug_given = 0 ; - args_info->time_given = 0 ; - args_info->info_given = 0 ; - args_info->input_given = 0 ; - args_info->output_given = 0 ; - args_info->only_fail_given = 0 ; - args_info->no_fail_given = 0 ; - args_info->copy_given = 0 ; - args_info->process_given = 0 ; - args_info->select_given = 0 ; - args_info->ignore_given = 0 ; - args_info->output_field_given = 0 ; - args_info->input_field_given = 0 ; - args_info->interactive_given = 0 ; - args_info->config_given = 0 ; - args_info->one_field_given = 0 ; - args_info->one_line_given = 0 ; - args_info->language_given = 0 ; -} - -static -void clear_args (struct gengetopt_args_info *args_info) -{ - FIX_UNUSED (args_info); - args_info->grammar_arg = NULL; - args_info->grammar_orig = NULL; - args_info->long_flag = 0; - args_info->debug_flag = 0; - args_info->time_flag = 0; - args_info->info_arg = gengetopt_strdup ("h"); - args_info->info_orig = NULL; - args_info->input_arg = NULL; - args_info->input_orig = NULL; - args_info->output_arg = NULL; - args_info->output_orig = NULL; - args_info->only_fail_flag = 0; - args_info->no_fail_flag = 0; - args_info->copy_flag = 0; - args_info->process_arg = NULL; - args_info->process_orig = NULL; - args_info->select_arg = NULL; - args_info->select_orig = NULL; - args_info->ignore_arg = NULL; - args_info->ignore_orig = NULL; - args_info->output_field_arg = NULL; - args_info->output_field_orig = NULL; - args_info->input_field_arg = NULL; - args_info->input_field_orig = NULL; - args_info->interactive_flag = 0; - args_info->config_arg = NULL; - args_info->config_orig = NULL; - args_info->one_field_flag = 0; - args_info->one_line_flag = 0; - args_info->language_arg = NULL; - args_info->language_orig = NULL; - -} - -static -void init_args_info(struct gengetopt_args_info *args_info) -{ - - init_help_array(); - args_info->help_help = gengetopt_args_info_full_help[0] ; - args_info->full_help_help = gengetopt_args_info_full_help[1] ; - args_info->version_help = gengetopt_args_info_full_help[2] ; - args_info->grammar_help = gengetopt_args_info_full_help[3] ; - args_info->long_help = gengetopt_args_info_full_help[4] ; - args_info->debug_help = gengetopt_args_info_full_help[5] ; - args_info->time_help = gengetopt_args_info_full_help[6] ; - args_info->info_help = gengetopt_args_info_full_help[7] ; - args_info->input_help = gengetopt_args_info_full_help[8] ; - args_info->output_help = gengetopt_args_info_full_help[9] ; - args_info->only_fail_help = gengetopt_args_info_full_help[10] ; - args_info->no_fail_help = gengetopt_args_info_full_help[11] ; - args_info->copy_help = gengetopt_args_info_full_help[12] ; - args_info->process_help = gengetopt_args_info_full_help[13] ; - args_info->process_min = 0; - args_info->process_max = 0; - args_info->select_help = gengetopt_args_info_full_help[14] ; - args_info->select_min = 0; - args_info->select_max = 0; - args_info->ignore_help = gengetopt_args_info_full_help[15] ; - args_info->ignore_min = 0; - args_info->ignore_max = 0; - args_info->output_field_help = gengetopt_args_info_full_help[16] ; - args_info->input_field_help = gengetopt_args_info_full_help[17] ; - args_info->input_field_min = 0; - args_info->input_field_max = 0; - args_info->interactive_help = gengetopt_args_info_full_help[18] ; - args_info->config_help = gengetopt_args_info_full_help[19] ; - args_info->one_field_help = gengetopt_args_info_full_help[20] ; - args_info->one_line_help = gengetopt_args_info_full_help[21] ; - args_info->language_help = gengetopt_args_info_full_help[22] ; - -} - -void -cmdline_parser_print_version (void) -{ - printf ("%s %s\n", - (strlen(CMDLINE_PARSER_PACKAGE_NAME) ? CMDLINE_PARSER_PACKAGE_NAME : CMDLINE_PARSER_PACKAGE), - CMDLINE_PARSER_VERSION); - - if (strlen(gengetopt_args_info_versiontext) > 0) - printf("\n%s\n", gengetopt_args_info_versiontext); -} - -static void print_help_common(void) { - cmdline_parser_print_version (); - - if (strlen(gengetopt_args_info_purpose) > 0) - printf("\n%s\n", gengetopt_args_info_purpose); - - if (strlen(gengetopt_args_info_usage) > 0) - printf("\n%s\n", gengetopt_args_info_usage); - - printf("\n"); - - if (strlen(gengetopt_args_info_description) > 0) - printf("%s\n\n", gengetopt_args_info_description); -} - -void -cmdline_parser_print_help (void) -{ - int i = 0; - print_help_common(); - while (gengetopt_args_info_help[i]) - printf("%s\n", gengetopt_args_info_help[i++]); -} - -void -cmdline_parser_print_full_help (void) -{ - int i = 0; - print_help_common(); - while (gengetopt_args_info_full_help[i]) - printf("%s\n", gengetopt_args_info_full_help[i++]); -} - -void -cmdline_parser_init (struct gengetopt_args_info *args_info) -{ - clear_given (args_info); - clear_args (args_info); - init_args_info (args_info); -} - -void -cmdline_parser_params_init(struct cmdline_parser_params *params) -{ - if (params) - { - params->override = 0; - params->initialize = 1; - params->check_required = 1; - params->check_ambiguity = 0; - params->print_errors = 1; - } -} - -struct cmdline_parser_params * -cmdline_parser_params_create(void) -{ - struct cmdline_parser_params *params = - (struct cmdline_parser_params *)malloc(sizeof(struct cmdline_parser_params)); - cmdline_parser_params_init(params); - return params; -} - -static void -free_string_field (char **s) -{ - if (*s) - { - free (*s); - *s = 0; - } -} - -/** @brief generic value variable */ -union generic_value { - char *string_arg; - const char *default_string_arg; -}; - -/** @brief holds temporary values for multiple options */ -struct generic_list -{ - union generic_value arg; - char *orig; - struct generic_list *next; -}; - -/** - * @brief add a node at the head of the list - */ -static void add_node(struct generic_list **list) { - struct generic_list *new_node = (struct generic_list *) malloc (sizeof (struct generic_list)); - new_node->next = *list; - *list = new_node; - new_node->arg.string_arg = 0; - new_node->orig = 0; -} - - -static void -free_multiple_string_field(unsigned int len, char ***arg, char ***orig) -{ - unsigned int i; - if (*arg) { - for (i = 0; i < len; ++i) - { - free_string_field(&((*arg)[i])); - free_string_field(&((*orig)[i])); - } - free_string_field(&((*arg)[0])); /* free default string */ - - free (*arg); - *arg = 0; - free (*orig); - *orig = 0; - } -} - -static void -cmdline_parser_release (struct gengetopt_args_info *args_info) -{ - - free_string_field (&(args_info->grammar_arg)); - free_string_field (&(args_info->grammar_orig)); - free_string_field (&(args_info->info_arg)); - free_string_field (&(args_info->info_orig)); - free_string_field (&(args_info->input_arg)); - free_string_field (&(args_info->input_orig)); - free_string_field (&(args_info->output_arg)); - free_string_field (&(args_info->output_orig)); - free_multiple_string_field (args_info->process_given, &(args_info->process_arg), &(args_info->process_orig)); - free_multiple_string_field (args_info->select_given, &(args_info->select_arg), &(args_info->select_orig)); - free_multiple_string_field (args_info->ignore_given, &(args_info->ignore_arg), &(args_info->ignore_orig)); - free_string_field (&(args_info->output_field_arg)); - free_string_field (&(args_info->output_field_orig)); - free_multiple_string_field (args_info->input_field_given, &(args_info->input_field_arg), &(args_info->input_field_orig)); - free_string_field (&(args_info->config_arg)); - free_string_field (&(args_info->config_orig)); - free_string_field (&(args_info->language_arg)); - free_string_field (&(args_info->language_orig)); - - - - clear_given (args_info); -} - - -static void -write_into_file(FILE *outfile, const char *opt, const char *arg, const char *values[]) -{ - FIX_UNUSED (values); - if (arg) { - fprintf(outfile, "%s=\"%s\"\n", opt, arg); - } else { - fprintf(outfile, "%s\n", opt); - } -} - -static void -write_multiple_into_file(FILE *outfile, int len, const char *opt, char **arg, const char *values[]) -{ - int i; - - for (i = 0; i < len; ++i) - write_into_file(outfile, opt, (arg ? arg[i] : 0), values); -} - -int -cmdline_parser_dump(FILE *outfile, struct gengetopt_args_info *args_info) -{ - int i = 0; - - if (!outfile) - { - fprintf (stderr, "%s: cannot dump options to stream\n", CMDLINE_PARSER_PACKAGE); - return EXIT_FAILURE; - } - - if (args_info->help_given) - write_into_file(outfile, "help", 0, 0 ); - if (args_info->full_help_given) - write_into_file(outfile, "full-help", 0, 0 ); - if (args_info->version_given) - write_into_file(outfile, "version", 0, 0 ); - if (args_info->grammar_given) - write_into_file(outfile, "grammar", args_info->grammar_orig, 0); - if (args_info->long_given) - write_into_file(outfile, "long", 0, 0 ); - if (args_info->debug_given) - write_into_file(outfile, "debug", 0, 0 ); - if (args_info->time_given) - write_into_file(outfile, "time", 0, 0 ); - if (args_info->info_given) - write_into_file(outfile, "info", args_info->info_orig, 0); - if (args_info->input_given) - write_into_file(outfile, "input", args_info->input_orig, 0); - if (args_info->output_given) - write_into_file(outfile, "output", args_info->output_orig, 0); - if (args_info->only_fail_given) - write_into_file(outfile, "only-fail", 0, 0 ); - if (args_info->no_fail_given) - write_into_file(outfile, "no-fail", 0, 0 ); - if (args_info->copy_given) - write_into_file(outfile, "copy", 0, 0 ); - write_multiple_into_file(outfile, args_info->process_given, "process", args_info->process_orig, 0); - write_multiple_into_file(outfile, args_info->select_given, "select", args_info->select_orig, 0); - write_multiple_into_file(outfile, args_info->ignore_given, "ignore", args_info->ignore_orig, 0); - if (args_info->output_field_given) - write_into_file(outfile, "output-field", args_info->output_field_orig, 0); - write_multiple_into_file(outfile, args_info->input_field_given, "input-field", args_info->input_field_orig, 0); - if (args_info->interactive_given) - write_into_file(outfile, "interactive", 0, 0 ); - if (args_info->config_given) - write_into_file(outfile, "config", args_info->config_orig, 0); - if (args_info->one_field_given) - write_into_file(outfile, "one-field", 0, 0 ); - if (args_info->one_line_given) - write_into_file(outfile, "one-line", 0, 0 ); - if (args_info->language_given) - write_into_file(outfile, "language", args_info->language_orig, 0); - - - i = EXIT_SUCCESS; - return i; -} - -int -cmdline_parser_file_save(const char *filename, struct gengetopt_args_info *args_info) -{ - FILE *outfile; - int i = 0; - - outfile = fopen(filename, "w"); - - if (!outfile) - { - fprintf (stderr, "%s: cannot open file for writing: %s\n", CMDLINE_PARSER_PACKAGE, filename); - return EXIT_FAILURE; - } - - i = cmdline_parser_dump(outfile, args_info); - fclose (outfile); - - return i; -} - -void -cmdline_parser_free (struct gengetopt_args_info *args_info) -{ - cmdline_parser_release (args_info); -} - -/** @brief replacement of strdup, which is not standard */ -char * -gengetopt_strdup (const char *s) -{ - char *result = 0; - if (!s) - return result; - - result = (char*)malloc(strlen(s) + 1); - if (result == (char*)0) - return (char*)0; - strcpy(result, s); - return result; -} - -static char * -get_multiple_arg_token(const char *arg) -{ - const char *tok; - char *ret; - size_t len, num_of_escape, i, j; - - if (!arg) - return 0; - - tok = strchr (arg, ','); - num_of_escape = 0; - - /* make sure it is not escaped */ - while (tok) - { - if (*(tok-1) == '\\') - { - /* find the next one */ - tok = strchr (tok+1, ','); - ++num_of_escape; - } - else - break; - } - - if (tok) - len = (size_t)(tok - arg + 1); - else - len = strlen (arg) + 1; - - len -= num_of_escape; - - ret = (char *) malloc (len); - - i = 0; - j = 0; - while (arg[i] && (j < len-1)) - { - if (arg[i] == '\\' && - arg[ i + 1 ] && - arg[ i + 1 ] == ',') - ++i; - - ret[j++] = arg[i++]; - } - - ret[len-1] = '\0'; - - return ret; -} - -static const char * -get_multiple_arg_token_next(const char *arg) -{ - const char *tok; - - if (!arg) - return 0; - - tok = strchr (arg, ','); - - /* make sure it is not escaped */ - while (tok) - { - if (*(tok-1) == '\\') - { - /* find the next one */ - tok = strchr (tok+1, ','); - } - else - break; - } - - if (! tok || strlen(tok) == 1) - return 0; - - return tok+1; -} - -static int -check_multiple_option_occurrences(const char *prog_name, unsigned int option_given, unsigned int min, unsigned int max, const char *option_desc); - -int -check_multiple_option_occurrences(const char *prog_name, unsigned int option_given, unsigned int min, unsigned int max, const char *option_desc) -{ - int error_occurred = 0; - - if (option_given && (min > 0 || max > 0)) - { - if (min > 0 && max > 0) - { - if (min == max) - { - /* specific occurrences */ - if (option_given != (unsigned int) min) - { - fprintf (stderr, "%s: %s option occurrences must be %d\n", - prog_name, option_desc, min); - error_occurred = 1; - } - } - else if (option_given < (unsigned int) min - || option_given > (unsigned int) max) - { - /* range occurrences */ - fprintf (stderr, "%s: %s option occurrences must be between %d and %d\n", - prog_name, option_desc, min, max); - error_occurred = 1; - } - } - else if (min > 0) - { - /* at least check */ - if (option_given < min) - { - fprintf (stderr, "%s: %s option occurrences must be at least %d\n", - prog_name, option_desc, min); - error_occurred = 1; - } - } - else if (max > 0) - { - /* at most check */ - if (option_given > max) - { - fprintf (stderr, "%s: %s option occurrences must be at most %d\n", - prog_name, option_desc, max); - error_occurred = 1; - } - } - } - - return error_occurred; -} -int -cmdline_parser (int argc, char **argv, struct gengetopt_args_info *args_info) -{ - return cmdline_parser2 (argc, argv, args_info, 0, 1, 1); -} - -int -cmdline_parser_ext (int argc, char **argv, struct gengetopt_args_info *args_info, - struct cmdline_parser_params *params) -{ - int result; - result = cmdline_parser_internal (argc, argv, args_info, params, 0); - - if (result == EXIT_FAILURE) - { - cmdline_parser_free (args_info); - exit (EXIT_FAILURE); - } - - return result; -} - -int -cmdline_parser2 (int argc, char **argv, struct gengetopt_args_info *args_info, int override, int initialize, int check_required) -{ - int result; - struct cmdline_parser_params params; - - params.override = override; - params.initialize = initialize; - params.check_required = check_required; - params.check_ambiguity = 0; - params.print_errors = 1; - - result = cmdline_parser_internal (argc, argv, args_info, ¶ms, 0); - - if (result == EXIT_FAILURE) - { - cmdline_parser_free (args_info); - exit (EXIT_FAILURE); - } - - return result; -} - -int -cmdline_parser_required (struct gengetopt_args_info *args_info, const char *prog_name) -{ - int result = EXIT_SUCCESS; - - if (cmdline_parser_required2(args_info, prog_name, 0) > 0) - result = EXIT_FAILURE; - - if (result == EXIT_FAILURE) - { - cmdline_parser_free (args_info); - exit (EXIT_FAILURE); - } - - return result; -} - -int -cmdline_parser_required2 (struct gengetopt_args_info *args_info, const char *prog_name, const char *additional_error) -{ - int error_occurred = 0; - FIX_UNUSED (additional_error); - - /* checks for required options */ - if (check_multiple_option_occurrences(prog_name, args_info->process_given, args_info->process_min, args_info->process_max, "'--process' ('-p')")) - error_occurred = 1; - - if (check_multiple_option_occurrences(prog_name, args_info->select_given, args_info->select_min, args_info->select_max, "'--select' ('-s')")) - error_occurred = 1; - - if (check_multiple_option_occurrences(prog_name, args_info->ignore_given, args_info->ignore_min, args_info->ignore_max, "'--ignore' ('-S')")) - error_occurred = 1; - - if (check_multiple_option_occurrences(prog_name, args_info->input_field_given, args_info->input_field_min, args_info->input_field_max, "'--input-field' ('-I')")) - error_occurred = 1; - - - /* checks for dependences among options */ - - return error_occurred; -} - - -static char *package_name = 0; - -/** - * @brief updates an option - * @param field the generic pointer to the field to update - * @param orig_field the pointer to the orig field - * @param field_given the pointer to the number of occurrence of this option - * @param prev_given the pointer to the number of occurrence already seen - * @param value the argument for this option (if null no arg was specified) - * @param possible_values the possible values for this option (if specified) - * @param default_value the default value (in case the option only accepts fixed values) - * @param arg_type the type of this option - * @param check_ambiguity @see cmdline_parser_params.check_ambiguity - * @param override @see cmdline_parser_params.override - * @param no_free whether to free a possible previous value - * @param multiple_option whether this is a multiple option - * @param long_opt the corresponding long option - * @param short_opt the corresponding short option (or '-' if none) - * @param additional_error possible further error specification - */ -static -int update_arg(void *field, char **orig_field, - unsigned int *field_given, unsigned int *prev_given, - char *value, const char *possible_values[], - const char *default_value, - cmdline_parser_arg_type arg_type, - int check_ambiguity, int override, - int no_free, int multiple_option, - const char *long_opt, char short_opt, - const char *additional_error) -{ - char *stop_char = 0; - const char *val = value; - int found; - char **string_field; - FIX_UNUSED (field); - - stop_char = 0; - found = 0; - - if (!multiple_option && prev_given && (*prev_given || (check_ambiguity && *field_given))) - { - if (short_opt != '-') - fprintf (stderr, "%s: `--%s' (`-%c') option given more than once%s\n", - package_name, long_opt, short_opt, - (additional_error ? additional_error : "")); - else - fprintf (stderr, "%s: `--%s' option given more than once%s\n", - package_name, long_opt, - (additional_error ? additional_error : "")); - return 1; /* failure */ - } - - FIX_UNUSED (default_value); - - if (field_given && *field_given && ! override) - return 0; - if (prev_given) - (*prev_given)++; - if (field_given) - (*field_given)++; - if (possible_values) - val = possible_values[found]; - - switch(arg_type) { - case ARG_FLAG: - *((int *)field) = !*((int *)field); - break; - case ARG_STRING: - if (val) { - string_field = (char **)field; - if (!no_free && *string_field) - free (*string_field); /* free previous string */ - *string_field = gengetopt_strdup (val); - } - break; - default: - break; - }; - - - /* store the original value */ - switch(arg_type) { - case ARG_NO: - case ARG_FLAG: - break; - default: - if (value && orig_field) { - if (no_free) { - *orig_field = value; - } else { - if (*orig_field) - free (*orig_field); /* free previous string */ - *orig_field = gengetopt_strdup (value); - } - } - }; - - return 0; /* OK */ -} - -/** - * @brief store information about a multiple option in a temporary list - * @param list where to (temporarily) store multiple options - */ -static -int update_multiple_arg_temp(struct generic_list **list, - unsigned int *prev_given, const char *val, - const char *possible_values[], const char *default_value, - cmdline_parser_arg_type arg_type, - const char *long_opt, char short_opt, - const char *additional_error) -{ - /* store single arguments */ - char *multi_token; - const char *multi_next; - - if (arg_type == ARG_NO) { - (*prev_given)++; - return 0; /* OK */ - } - - multi_token = get_multiple_arg_token(val); - multi_next = get_multiple_arg_token_next (val); - - while (1) - { - add_node (list); - if (update_arg((void *)&((*list)->arg), &((*list)->orig), 0, - prev_given, multi_token, possible_values, default_value, - arg_type, 0, 1, 1, 1, long_opt, short_opt, additional_error)) { - if (multi_token) free(multi_token); - return 1; /* failure */ - } - - if (multi_next) - { - multi_token = get_multiple_arg_token(multi_next); - multi_next = get_multiple_arg_token_next (multi_next); - } - else - break; - } - - return 0; /* OK */ -} - -/** - * @brief free the passed list (including possible string argument) - */ -static -void free_list(struct generic_list *list, short string_arg) -{ - if (list) { - struct generic_list *tmp; - while (list) - { - tmp = list; - if (string_arg && list->arg.string_arg) - free (list->arg.string_arg); - if (list->orig) - free (list->orig); - list = list->next; - free (tmp); - } - } -} - -/** - * @brief updates a multiple option starting from the passed list - */ -static -void update_multiple_arg(void *field, char ***orig_field, - unsigned int field_given, unsigned int prev_given, union generic_value *default_value, - cmdline_parser_arg_type arg_type, - struct generic_list *list) -{ - int i; - struct generic_list *tmp; - - if (prev_given && list) { - *orig_field = (char **) realloc (*orig_field, (field_given + prev_given) * sizeof (char *)); - - switch(arg_type) { - case ARG_STRING: - *((char ***)field) = (char **)realloc (*((char ***)field), (field_given + prev_given) * sizeof (char *)); break; - default: - break; - }; - - for (i = (prev_given - 1); i >= 0; --i) - { - tmp = list; - - switch(arg_type) { - case ARG_STRING: - (*((char ***)field))[i + field_given] = tmp->arg.string_arg; break; - default: - break; - } - (*orig_field) [i + field_given] = list->orig; - list = list->next; - free (tmp); - } - } else { /* set the default value */ - if (default_value && ! field_given) { - switch(arg_type) { - case ARG_STRING: - if (! *((char ***)field)) { - *((char ***)field) = (char **)malloc (sizeof (char *)); - (*((char ***)field))[0] = gengetopt_strdup(default_value->string_arg); - } - break; - default: break; - } - if (!(*orig_field)) { - *orig_field = (char **) malloc (sizeof (char *)); - (*orig_field)[0] = 0; - } - } - } -} - -int -cmdline_parser_internal ( - int argc, char **argv, struct gengetopt_args_info *args_info, - struct cmdline_parser_params *params, const char *additional_error) -{ - int c; /* Character of the parsed option. */ - - struct generic_list * process_list = NULL; - struct generic_list * select_list = NULL; - struct generic_list * ignore_list = NULL; - struct generic_list * input_field_list = NULL; - int error_occurred = 0; - struct gengetopt_args_info local_args_info; - - int override; - int initialize; - int check_required; - int check_ambiguity; - - package_name = argv[0]; - - override = params->override; - initialize = params->initialize; - check_required = params->check_required; - check_ambiguity = params->check_ambiguity; - - if (initialize) - cmdline_parser_init (args_info); - - cmdline_parser_init (&local_args_info); - - optarg = 0; - optind = 0; - opterr = params->print_errors; - optopt = '?'; - - while (1) - { - int option_index = 0; - - static struct option long_options[] = { - { "help", 0, NULL, 'h' }, - { "full-help", 0, NULL, 0 }, - { "version", 0, NULL, 'V' }, - { "grammar", 1, NULL, 'g' }, - { "long", 0, NULL, 'l' }, - { "debug", 0, NULL, 'd' }, - { "time", 0, NULL, 0 }, - { "info", 1, NULL, 0 }, - { "input", 1, NULL, 'f' }, - { "output", 1, NULL, 'o' }, - { "only-fail", 0, NULL, 0 }, - { "no-fail", 0, NULL, 0 }, - { "copy", 0, NULL, 'c' }, - { "process", 1, NULL, 'p' }, - { "select", 1, NULL, 's' }, - { "ignore", 1, NULL, 'S' }, - { "output-field", 1, NULL, 'O' }, - { "input-field", 1, NULL, 'I' }, - { "interactive", 0, NULL, 'i' }, - { "config", 1, NULL, 0 }, - { "one-field", 0, NULL, '1' }, - { "one-line", 0, NULL, 0 }, - { "language", 1, NULL, 0 }, - { 0, 0, 0, 0 } - }; - - c = getopt_long (argc, argv, "hVg:ldf:o:cp:s:S:O:I:i1", long_options, &option_index); - - if (c == -1) break; /* Exit from `while (1)' loop. */ - - switch (c) - { - case 'h': /* Print help and exit. */ - cmdline_parser_print_help (); - cmdline_parser_free (&local_args_info); - exit (EXIT_SUCCESS); - - case 'V': /* Print version and exit. */ - cmdline_parser_print_version (); - cmdline_parser_free (&local_args_info); - exit (EXIT_SUCCESS); - - case 'g': /* Grammar file. */ - - - if (update_arg( (void *)&(args_info->grammar_arg), - &(args_info->grammar_orig), &(args_info->grammar_given), - &(local_args_info.grammar_given), optarg, 0, 0, ARG_STRING, - check_ambiguity, override, 0, 0, - "grammar", 'g', - additional_error)) - goto failure; - - break; - case 'l': /* Long output. */ - - - if (update_arg((void *)&(args_info->long_flag), 0, &(args_info->long_given), - &(local_args_info.long_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "long", 'l', - additional_error)) - goto failure; - - break; - case 'd': /* Debug mode.. */ - - - if (update_arg((void *)&(args_info->debug_flag), 0, &(args_info->debug_given), - &(local_args_info.debug_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "debug", 'd', - additional_error)) - goto failure; - - break; - case 'f': /* Input file. */ - - - if (update_arg( (void *)&(args_info->input_arg), - &(args_info->input_orig), &(args_info->input_given), - &(local_args_info.input_given), optarg, 0, 0, ARG_STRING, - check_ambiguity, override, 0, 0, - "input", 'f', - additional_error)) - goto failure; - - break; - case 'o': /* Output file. */ - - - if (update_arg( (void *)&(args_info->output_arg), - &(args_info->output_orig), &(args_info->output_given), - &(local_args_info.output_given), optarg, 0, 0, ARG_STRING, - check_ambiguity, override, 0, 0, - "output", 'o', - additional_error)) - goto failure; - - break; - case 'c': /* Copy succesfully processed segments to output. */ - - - if (update_arg((void *)&(args_info->copy_flag), 0, &(args_info->copy_given), - &(local_args_info.copy_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "copy", 'c', - additional_error)) - goto failure; - - break; - case 'p': /* Process segments of this type only. */ - - if (update_multiple_arg_temp(&process_list, - &(local_args_info.process_given), optarg, 0, 0, ARG_STRING, - "process", 'p', - additional_error)) - goto failure; - - break; - case 's': /* Select only segments containing this field. */ - - if (update_multiple_arg_temp(&select_list, - &(local_args_info.select_given), optarg, 0, 0, ARG_STRING, - "select", 's', - additional_error)) - goto failure; - - break; - case 'S': /* Select only segments, which doesn't contain this field. */ - - if (update_multiple_arg_temp(&ignore_list, - &(local_args_info.ignore_given), optarg, 0, 0, ARG_STRING, - "ignore", 'S', - additional_error)) - goto failure; - - break; - case 'O': /* Output field name (default: program name). */ - - - if (update_arg( (void *)&(args_info->output_field_arg), - &(args_info->output_field_orig), &(args_info->output_field_given), - &(local_args_info.output_field_given), optarg, 0, 0, ARG_STRING, - check_ambiguity, override, 0, 0, - "output-field", 'O', - additional_error)) - goto failure; - - break; - case 'I': /* Input field name (default: the FORM field). */ - - if (update_multiple_arg_temp(&input_field_list, - &(local_args_info.input_field_given), optarg, 0, 0, ARG_STRING, - "input-field", 'I', - additional_error)) - goto failure; - - break; - case 'i': /* Toggle interactive mode. */ - - - if (update_arg((void *)&(args_info->interactive_flag), 0, &(args_info->interactive_given), - &(local_args_info.interactive_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "interactive", 'i', - additional_error)) - goto failure; - - break; - case '1': /* Print all alternative results in one field (creates compact ambiguous annotation). */ - - - if (update_arg((void *)&(args_info->one_field_flag), 0, &(args_info->one_field_given), - &(local_args_info.one_field_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "one-field", '1', - additional_error)) - goto failure; - - break; - - case 0: /* Long option with no short option */ - if (strcmp (long_options[option_index].name, "full-help") == 0) { - cmdline_parser_print_full_help (); - cmdline_parser_free (&local_args_info); - exit (EXIT_SUCCESS); - } - - /* Print parse time.. */ - if (strcmp (long_options[option_index].name, "time") == 0) - { - - - if (update_arg((void *)&(args_info->time_flag), 0, &(args_info->time_given), - &(local_args_info.time_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "time", '-', - additional_error)) - goto failure; - - } - /* Print info. - h - heads d - dependents - s - sets - c - constraints n - node/arc counts. */ - else if (strcmp (long_options[option_index].name, "info") == 0) - { - - - if (update_arg( (void *)&(args_info->info_arg), - &(args_info->info_orig), &(args_info->info_given), - &(local_args_info.info_given), optarg, 0, "h", ARG_STRING, - check_ambiguity, override, 0, 0, - "info", '-', - additional_error)) - goto failure; - - } - /* Print only segments the program failed to process. */ - else if (strcmp (long_options[option_index].name, "only-fail") == 0) - { - - - if (update_arg((void *)&(args_info->only_fail_flag), 0, &(args_info->only_fail_given), - &(local_args_info.only_fail_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "only-fail", '-', - additional_error)) - goto failure; - - } - /* Print only segments the program processed. */ - else if (strcmp (long_options[option_index].name, "no-fail") == 0) - { - - - if (update_arg((void *)&(args_info->no_fail_flag), 0, &(args_info->no_fail_given), - &(local_args_info.no_fail_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "no-fail", '-', - additional_error)) - goto failure; - - } - /* Configuration file. */ - else if (strcmp (long_options[option_index].name, "config") == 0) - { - - - if (update_arg( (void *)&(args_info->config_arg), - &(args_info->config_orig), &(args_info->config_given), - &(local_args_info.config_given), optarg, 0, 0, ARG_STRING, - check_ambiguity, override, 0, 0, - "config", '-', - additional_error)) - goto failure; - - } - /* Print annotation alternatives as additional fields in the same segment. */ - else if (strcmp (long_options[option_index].name, "one-line") == 0) - { - - - if (update_arg((void *)&(args_info->one_line_flag), 0, &(args_info->one_line_given), - &(local_args_info.one_line_given), optarg, 0, 0, ARG_FLAG, - check_ambiguity, override, 1, 0, "one-line", '-', - additional_error)) - goto failure; - - } - /* Language.. */ - else if (strcmp (long_options[option_index].name, "language") == 0) - { - - - if (update_arg( (void *)&(args_info->language_arg), - &(args_info->language_orig), &(args_info->language_given), - &(local_args_info.language_given), optarg, 0, 0, ARG_STRING, - check_ambiguity, override, 0, 0, - "language", '-', - additional_error)) - goto failure; - - } - - break; - case '?': /* Invalid option. */ - /* `getopt_long' already printed an error message. */ - goto failure; - - default: /* bug: option not considered. */ - fprintf (stderr, "%s: option unknown: %c%s\n", CMDLINE_PARSER_PACKAGE, c, (additional_error ? additional_error : "")); - abort (); - } /* switch */ - } /* while */ - - - update_multiple_arg((void *)&(args_info->process_arg), - &(args_info->process_orig), args_info->process_given, - local_args_info.process_given, 0, - ARG_STRING, process_list); - update_multiple_arg((void *)&(args_info->select_arg), - &(args_info->select_orig), args_info->select_given, - local_args_info.select_given, 0, - ARG_STRING, select_list); - update_multiple_arg((void *)&(args_info->ignore_arg), - &(args_info->ignore_orig), args_info->ignore_given, - local_args_info.ignore_given, 0, - ARG_STRING, ignore_list); - update_multiple_arg((void *)&(args_info->input_field_arg), - &(args_info->input_field_orig), args_info->input_field_given, - local_args_info.input_field_given, 0, - ARG_STRING, input_field_list); - - args_info->process_given += local_args_info.process_given; - local_args_info.process_given = 0; - args_info->select_given += local_args_info.select_given; - local_args_info.select_given = 0; - args_info->ignore_given += local_args_info.ignore_given; - local_args_info.ignore_given = 0; - args_info->input_field_given += local_args_info.input_field_given; - local_args_info.input_field_given = 0; - - if (check_required) - { - error_occurred += cmdline_parser_required2 (args_info, argv[0], additional_error); - } - - cmdline_parser_release (&local_args_info); - - if ( error_occurred ) - return (EXIT_FAILURE); - - return 0; - -failure: - free_list (process_list, 1 ); - free_list (select_list, 1 ); - free_list (ignore_list, 1 ); - free_list (input_field_list, 1 ); - - cmdline_parser_release (&local_args_info); - return (EXIT_FAILURE); -} - -#ifndef CONFIG_FILE_LINE_SIZE -#define CONFIG_FILE_LINE_SIZE 2048 -#endif -#define ADDITIONAL_ERROR " in configuration file " - -#define CONFIG_FILE_LINE_BUFFER_SIZE (CONFIG_FILE_LINE_SIZE+3) -/* 3 is for "--" and "=" */ - -static int -_cmdline_parser_configfile (const char *filename, int *my_argc) -{ - FILE* file; - char my_argv[CONFIG_FILE_LINE_BUFFER_SIZE+1]; - char linebuf[CONFIG_FILE_LINE_SIZE]; - int line_num = 0; - int result = 0, equal; - char *fopt, *farg; - char *str_index; - size_t len, next_token; - char delimiter; - - if ((file = fopen(filename, "r")) == 0) - { - fprintf (stderr, "%s: Error opening configuration file '%s'\n", - CMDLINE_PARSER_PACKAGE, filename); - return EXIT_FAILURE; - } - - while ((fgets(linebuf, CONFIG_FILE_LINE_SIZE, file)) != 0) - { - ++line_num; - my_argv[0] = '\0'; - len = strlen(linebuf); - if (len > (CONFIG_FILE_LINE_BUFFER_SIZE-1)) - { - fprintf (stderr, "%s:%s:%d: Line too long in configuration file\n", - CMDLINE_PARSER_PACKAGE, filename, line_num); - result = EXIT_FAILURE; - break; - } - - /* find first non-whitespace character in the line */ - next_token = strspn (linebuf, " \t\r\n"); - str_index = linebuf + next_token; - - if ( str_index[0] == '\0' || str_index[0] == '#') - continue; /* empty line or comment line is skipped */ - - fopt = str_index; - - /* truncate fopt at the end of the first non-valid character */ - next_token = strcspn (fopt, " \t\r\n="); - - if (fopt[next_token] == '\0') /* the line is over */ - { - farg = 0; - equal = 0; - goto noarg; - } - - /* remember if equal sign is present */ - equal = (fopt[next_token] == '='); - fopt[next_token++] = '\0'; - - /* advance pointers to the next token after the end of fopt */ - next_token += strspn (fopt + next_token, " \t\r\n"); - - /* check for the presence of equal sign, and if so, skip it */ - if ( !equal ) - if ((equal = (fopt[next_token] == '='))) - { - next_token++; - next_token += strspn (fopt + next_token, " \t\r\n"); - } - str_index += next_token; - - /* find argument */ - farg = str_index; - if ( farg[0] == '\"' || farg[0] == '\'' ) - { /* quoted argument */ - str_index = strchr (++farg, str_index[0] ); /* skip opening quote */ - if (! str_index) - { - fprintf - (stderr, - "%s:%s:%d: unterminated string in configuration file\n", - CMDLINE_PARSER_PACKAGE, filename, line_num); - result = EXIT_FAILURE; - break; - } - } - else - { /* read up the remaining part up to a delimiter */ - next_token = strcspn (farg, " \t\r\n#\'\""); - str_index += next_token; - } - - /* truncate farg at the delimiter and store it for further check */ - delimiter = *str_index, *str_index++ = '\0'; - - /* everything but comment is illegal at the end of line */ - if (delimiter != '\0' && delimiter != '#') - { - str_index += strspn(str_index, " \t\r\n"); - if (*str_index != '\0' && *str_index != '#') - { - fprintf - (stderr, - "%s:%s:%d: malformed string in configuration file\n", - CMDLINE_PARSER_PACKAGE, filename, line_num); - result = EXIT_FAILURE; - break; - } - } - - noarg: - if (!strcmp(fopt,"include")) { - if (farg && *farg) { - result = _cmdline_parser_configfile(farg, my_argc); - } else { - fprintf(stderr, "%s:%s:%d: include requires a filename argument.\n", - CMDLINE_PARSER_PACKAGE, filename, line_num); - } - continue; - } - len = strlen(fopt); - strcat (my_argv, len > 1 ? "--" : "-"); - strcat (my_argv, fopt); - if (len > 1 && ((farg && *farg) || equal)) - strcat (my_argv, "="); - if (farg && *farg) - strcat (my_argv, farg); - ++(*my_argc); - - cmd_line_list_tmp = (struct line_list *) malloc (sizeof (struct line_list)); - cmd_line_list_tmp->next = cmd_line_list; - cmd_line_list = cmd_line_list_tmp; - cmd_line_list->string_arg = gengetopt_strdup(my_argv); - } /* while */ - - if (file) - fclose(file); - return result; -} - -int -cmdline_parser_configfile ( - const char *filename, - struct gengetopt_args_info *args_info, - int override, int initialize, int check_required) -{ - struct cmdline_parser_params params; - - params.override = override; - params.initialize = initialize; - params.check_required = check_required; - params.check_ambiguity = 0; - params.print_errors = 1; - - return cmdline_parser_config_file (filename, args_info, ¶ms); -} - -int -cmdline_parser_config_file (const char *filename, - struct gengetopt_args_info *args_info, - struct cmdline_parser_params *params) -{ - int i, result; - int my_argc = 1; - char **my_argv_arg; - char *additional_error; - - /* store the program name */ - cmd_line_list_tmp = (struct line_list *) malloc (sizeof (struct line_list)); - cmd_line_list_tmp->next = cmd_line_list; - cmd_line_list = cmd_line_list_tmp; - cmd_line_list->string_arg = gengetopt_strdup (CMDLINE_PARSER_PACKAGE); - - result = _cmdline_parser_configfile(filename, &my_argc); - - if (result != EXIT_FAILURE) { - my_argv_arg = (char **) malloc((my_argc+1) * sizeof(char *)); - cmd_line_list_tmp = cmd_line_list; - - for (i = my_argc - 1; i >= 0; --i) { - my_argv_arg[i] = cmd_line_list_tmp->string_arg; - cmd_line_list_tmp = cmd_line_list_tmp->next; - } - - my_argv_arg[my_argc] = 0; - - additional_error = (char *)malloc(strlen(filename) + strlen(ADDITIONAL_ERROR) + 1); - strcpy (additional_error, ADDITIONAL_ERROR); - strcat (additional_error, filename); - result = - cmdline_parser_internal (my_argc, my_argv_arg, args_info, - params, - additional_error); - - free (additional_error); - free (my_argv_arg); - } - - free_cmd_list(); - if (result == EXIT_FAILURE) - { - cmdline_parser_free (args_info); - exit (EXIT_FAILURE); - } - - return result; -} diff --git a/src/dgp/cmdline.d b/src/dgp/cmdline.d deleted file mode 100644 index 620fb6c..0000000 --- a/src/dgp/cmdline.d +++ /dev/null @@ -1 +0,0 @@ -cmdline.o cmdline.d : cmdline.cc cmdline.h diff --git a/src/dgp/cmdline.ggo b/src/dgp/cmdline.ggo deleted file mode 100644 index 9caa35c..0000000 --- a/src/dgp/cmdline.ggo +++ /dev/null @@ -1,52 +0,0 @@ -package "dgp" -version "0.1" - -option "grammar" g "Grammar file" - string no typestr="filename" - -option "long" l "Long output" - flag off - -option "debug" d "Debug mode." - flag off - -option "time" - "Print parse time." - flag off - -option "info" - "Print info. - h - heads d - dependents - s - sets - c - constraints n - node/arc counts" -string no default="h" -#section "Common UTT options" - - -option "input" f "Input file" string no - -option "output" o "Output file" string no - -option "only-fail" - "Print only segments the program failed to process" flag off hidden - -option "no-fail" - "Print only segments the program processed" flag off hidden - -option "copy" c "Copy succesfully processed segments to output" flag off - -option "process" p "Process segments of this type only" string no multiple - -option "select" s "Select only segments containing this field" string no multiple - -option "ignore" S "Select only segments, which doesn't contain this field" string no multiple - -option "output-field" O "Output field name (default: program name)" string no - -option "input-field" I "Input field name (default: the FORM field)" string no multiple - -option "interactive" i "Toggle interactive mode" flag off - -option "config" - "Configuration file" string typestr="FILENAME" no - -option "one-field" 1 "Print all alternative results in one field (creates compact ambiguous annotation)" flag off - -option "one-line" - "Print annotation alternatives as additional fields in the same segment" flag off - -option "language" - "Language." string no diff --git a/src/dgp/cmdline.h b/src/dgp/cmdline.h deleted file mode 100644 index 2f505cc..0000000 --- a/src/dgp/cmdline.h +++ /dev/null @@ -1,294 +0,0 @@ -/** @file cmdline.h - * @brief The header file for the command line option parser - * generated by GNU Gengetopt version 2.22.6 - * http://www.gnu.org/software/gengetopt. - * DO NOT modify this file, since it can be overwritten - * @author GNU Gengetopt by Lorenzo Bettini */ - -#ifndef CMDLINE_H -#define CMDLINE_H - -/* If we use autoconf. */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include /* for FILE */ - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#ifndef CMDLINE_PARSER_PACKAGE -/** @brief the program name (used for printing errors) */ -#define CMDLINE_PARSER_PACKAGE "dgp" -#endif - -#ifndef CMDLINE_PARSER_PACKAGE_NAME -/** @brief the complete program name (used for help and version) */ -#define CMDLINE_PARSER_PACKAGE_NAME "dgp" -#endif - -#ifndef CMDLINE_PARSER_VERSION -/** @brief the program version */ -#define CMDLINE_PARSER_VERSION "0.1" -#endif - -/** @brief Where the command line options are stored */ -struct gengetopt_args_info -{ - const char *help_help; /**< @brief Print help and exit help description. */ - const char *full_help_help; /**< @brief Print help, including hidden options, and exit help description. */ - const char *version_help; /**< @brief Print version and exit help description. */ - char * grammar_arg; /**< @brief Grammar file. */ - char * grammar_orig; /**< @brief Grammar file original value given at command line. */ - const char *grammar_help; /**< @brief Grammar file help description. */ - int long_flag; /**< @brief Long output (default=off). */ - const char *long_help; /**< @brief Long output help description. */ - int debug_flag; /**< @brief Debug mode. (default=off). */ - const char *debug_help; /**< @brief Debug mode. help description. */ - int time_flag; /**< @brief Print parse time. (default=off). */ - const char *time_help; /**< @brief Print parse time. help description. */ - char * info_arg; /**< @brief Print info. - h - heads d - dependents - s - sets - c - constraints n - node/arc counts (default='h'). */ - char * info_orig; /**< @brief Print info. - h - heads d - dependents - s - sets - c - constraints n - node/arc counts original value given at command line. */ - const char *info_help; /**< @brief Print info. - h - heads d - dependents - s - sets - c - constraints n - node/arc counts help description. */ - char * input_arg; /**< @brief Input file. */ - char * input_orig; /**< @brief Input file original value given at command line. */ - const char *input_help; /**< @brief Input file help description. */ - char * output_arg; /**< @brief Output file. */ - char * output_orig; /**< @brief Output file original value given at command line. */ - const char *output_help; /**< @brief Output file help description. */ - int only_fail_flag; /**< @brief Print only segments the program failed to process (default=off). */ - const char *only_fail_help; /**< @brief Print only segments the program failed to process help description. */ - int no_fail_flag; /**< @brief Print only segments the program processed (default=off). */ - const char *no_fail_help; /**< @brief Print only segments the program processed help description. */ - int copy_flag; /**< @brief Copy succesfully processed segments to output (default=off). */ - const char *copy_help; /**< @brief Copy succesfully processed segments to output help description. */ - char ** process_arg; /**< @brief Process segments of this type only. */ - char ** process_orig; /**< @brief Process segments of this type only original value given at command line. */ - unsigned int process_min; /**< @brief Process segments of this type only's minimum occurreces */ - unsigned int process_max; /**< @brief Process segments of this type only's maximum occurreces */ - const char *process_help; /**< @brief Process segments of this type only help description. */ - char ** select_arg; /**< @brief Select only segments containing this field. */ - char ** select_orig; /**< @brief Select only segments containing this field original value given at command line. */ - unsigned int select_min; /**< @brief Select only segments containing this field's minimum occurreces */ - unsigned int select_max; /**< @brief Select only segments containing this field's maximum occurreces */ - const char *select_help; /**< @brief Select only segments containing this field help description. */ - char ** ignore_arg; /**< @brief Select only segments, which doesn't contain this field. */ - char ** ignore_orig; /**< @brief Select only segments, which doesn't contain this field original value given at command line. */ - unsigned int ignore_min; /**< @brief Select only segments, which doesn't contain this field's minimum occurreces */ - unsigned int ignore_max; /**< @brief Select only segments, which doesn't contain this field's maximum occurreces */ - const char *ignore_help; /**< @brief Select only segments, which doesn't contain this field help description. */ - char * output_field_arg; /**< @brief Output field name (default: program name). */ - char * output_field_orig; /**< @brief Output field name (default: program name) original value given at command line. */ - const char *output_field_help; /**< @brief Output field name (default: program name) help description. */ - char ** input_field_arg; /**< @brief Input field name (default: the FORM field). */ - char ** input_field_orig; /**< @brief Input field name (default: the FORM field) original value given at command line. */ - unsigned int input_field_min; /**< @brief Input field name (default: the FORM field)'s minimum occurreces */ - unsigned int input_field_max; /**< @brief Input field name (default: the FORM field)'s maximum occurreces */ - const char *input_field_help; /**< @brief Input field name (default: the FORM field) help description. */ - int interactive_flag; /**< @brief Toggle interactive mode (default=off). */ - const char *interactive_help; /**< @brief Toggle interactive mode help description. */ - char * config_arg; /**< @brief Configuration file. */ - char * config_orig; /**< @brief Configuration file original value given at command line. */ - const char *config_help; /**< @brief Configuration file help description. */ - int one_field_flag; /**< @brief Print all alternative results in one field (creates compact ambiguous annotation) (default=off). */ - const char *one_field_help; /**< @brief Print all alternative results in one field (creates compact ambiguous annotation) help description. */ - int one_line_flag; /**< @brief Print annotation alternatives as additional fields in the same segment (default=off). */ - const char *one_line_help; /**< @brief Print annotation alternatives as additional fields in the same segment help description. */ - char * language_arg; /**< @brief Language.. */ - char * language_orig; /**< @brief Language. original value given at command line. */ - const char *language_help; /**< @brief Language. help description. */ - - unsigned int help_given ; /**< @brief Whether help was given. */ - unsigned int full_help_given ; /**< @brief Whether full-help was given. */ - unsigned int version_given ; /**< @brief Whether version was given. */ - unsigned int grammar_given ; /**< @brief Whether grammar was given. */ - unsigned int long_given ; /**< @brief Whether long was given. */ - unsigned int debug_given ; /**< @brief Whether debug was given. */ - unsigned int time_given ; /**< @brief Whether time was given. */ - unsigned int info_given ; /**< @brief Whether info was given. */ - unsigned int input_given ; /**< @brief Whether input was given. */ - unsigned int output_given ; /**< @brief Whether output was given. */ - unsigned int only_fail_given ; /**< @brief Whether only-fail was given. */ - unsigned int no_fail_given ; /**< @brief Whether no-fail was given. */ - unsigned int copy_given ; /**< @brief Whether copy was given. */ - unsigned int process_given ; /**< @brief Whether process was given. */ - unsigned int select_given ; /**< @brief Whether select was given. */ - unsigned int ignore_given ; /**< @brief Whether ignore was given. */ - unsigned int output_field_given ; /**< @brief Whether output-field was given. */ - unsigned int input_field_given ; /**< @brief Whether input-field was given. */ - unsigned int interactive_given ; /**< @brief Whether interactive was given. */ - unsigned int config_given ; /**< @brief Whether config was given. */ - unsigned int one_field_given ; /**< @brief Whether one-field was given. */ - unsigned int one_line_given ; /**< @brief Whether one-line was given. */ - unsigned int language_given ; /**< @brief Whether language was given. */ - -} ; - -/** @brief The additional parameters to pass to parser functions */ -struct cmdline_parser_params -{ - int override; /**< @brief whether to override possibly already present options (default 0) */ - int initialize; /**< @brief whether to initialize the option structure gengetopt_args_info (default 1) */ - int check_required; /**< @brief whether to check that all required options were provided (default 1) */ - int check_ambiguity; /**< @brief whether to check for options already specified in the option structure gengetopt_args_info (default 0) */ - int print_errors; /**< @brief whether getopt_long should print an error message for a bad option (default 1) */ -} ; - -/** @brief the purpose string of the program */ -extern const char *gengetopt_args_info_purpose; -/** @brief the usage string of the program */ -extern const char *gengetopt_args_info_usage; -/** @brief the description string of the program */ -extern const char *gengetopt_args_info_description; -/** @brief all the lines making the help output */ -extern const char *gengetopt_args_info_help[]; -/** @brief all the lines making the full help output (including hidden options) */ -extern const char *gengetopt_args_info_full_help[]; - -/** - * The command line parser - * @param argc the number of command line options - * @param argv the command line options - * @param args_info the structure where option information will be stored - * @return 0 if everything went fine, NON 0 if an error took place - */ -int cmdline_parser (int argc, char **argv, - struct gengetopt_args_info *args_info); - -/** - * The command line parser (version with additional parameters - deprecated) - * @param argc the number of command line options - * @param argv the command line options - * @param args_info the structure where option information will be stored - * @param override whether to override possibly already present options - * @param initialize whether to initialize the option structure my_args_info - * @param check_required whether to check that all required options were provided - * @return 0 if everything went fine, NON 0 if an error took place - * @deprecated use cmdline_parser_ext() instead - */ -int cmdline_parser2 (int argc, char **argv, - struct gengetopt_args_info *args_info, - int override, int initialize, int check_required); - -/** - * The command line parser (version with additional parameters) - * @param argc the number of command line options - * @param argv the command line options - * @param args_info the structure where option information will be stored - * @param params additional parameters for the parser - * @return 0 if everything went fine, NON 0 if an error took place - */ -int cmdline_parser_ext (int argc, char **argv, - struct gengetopt_args_info *args_info, - struct cmdline_parser_params *params); - -/** - * Save the contents of the option struct into an already open FILE stream. - * @param outfile the stream where to dump options - * @param args_info the option struct to dump - * @return 0 if everything went fine, NON 0 if an error took place - */ -int cmdline_parser_dump(FILE *outfile, - struct gengetopt_args_info *args_info); - -/** - * Save the contents of the option struct into a (text) file. - * This file can be read by the config file parser (if generated by gengetopt) - * @param filename the file where to save - * @param args_info the option struct to save - * @return 0 if everything went fine, NON 0 if an error took place - */ -int cmdline_parser_file_save(const char *filename, - struct gengetopt_args_info *args_info); - -/** - * Print the help - */ -void cmdline_parser_print_help(void); -/** - * Print the full help (including hidden options) - */ -void cmdline_parser_print_full_help(void); -/** - * Print the version - */ -void cmdline_parser_print_version(void); - -/** - * Initializes all the fields a cmdline_parser_params structure - * to their default values - * @param params the structure to initialize - */ -void cmdline_parser_params_init(struct cmdline_parser_params *params); - -/** - * Allocates dynamically a cmdline_parser_params structure and initializes - * all its fields to their default values - * @return the created and initialized cmdline_parser_params structure - */ -struct cmdline_parser_params *cmdline_parser_params_create(void); - -/** - * Initializes the passed gengetopt_args_info structure's fields - * (also set default values for options that have a default) - * @param args_info the structure to initialize - */ -void cmdline_parser_init (struct gengetopt_args_info *args_info); -/** - * Deallocates the string fields of the gengetopt_args_info structure - * (but does not deallocate the structure itself) - * @param args_info the structure to deallocate - */ -void cmdline_parser_free (struct gengetopt_args_info *args_info); - -/** - * The config file parser (deprecated version) - * @param filename the name of the config file - * @param args_info the structure where option information will be stored - * @param override whether to override possibly already present options - * @param initialize whether to initialize the option structure my_args_info - * @param check_required whether to check that all required options were provided - * @return 0 if everything went fine, NON 0 if an error took place - * @deprecated use cmdline_parser_config_file() instead - */ -int cmdline_parser_configfile (const char *filename, - struct gengetopt_args_info *args_info, - int override, int initialize, int check_required); - -/** - * The config file parser - * @param filename the name of the config file - * @param args_info the structure where option information will be stored - * @param params additional parameters for the parser - * @return 0 if everything went fine, NON 0 if an error took place - */ -int cmdline_parser_config_file (const char *filename, - struct gengetopt_args_info *args_info, - struct cmdline_parser_params *params); - -/** - * Checks that all the required options were specified - * @param args_info the structure to check - * @param prog_name the name of the program that will be used to print - * possible errors - * @return - */ -int cmdline_parser_required (struct gengetopt_args_info *args_info, - const char *prog_name); - - -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* CMDLINE_H */ diff --git a/src/dgp/cmdline.o b/src/dgp/cmdline.o deleted file mode 100644 index d495a14..0000000 Binary files a/src/dgp/cmdline.o and /dev/null differ diff --git a/src/dgp/dgp b/src/dgp/dgp deleted file mode 100755 index 6df0a54..0000000 Binary files a/src/dgp/dgp and /dev/null differ diff --git a/src/dgp/dgp1.d b/src/dgp/dgp1.d deleted file mode 100644 index 20dac4f..0000000 --- a/src/dgp/dgp1.d +++ /dev/null @@ -1,3 +0,0 @@ -dgp1.o dgp1.d : dgp1.cc dgp1.hh grammar.hh const.hh thesymbols.hh symbol.hh \ - sgraph.hh mgraph.hh ../common/common.h ../common/../lib/const.h \ - ../common/../dgp/cmdline.h boubble.hh global.hh diff --git a/src/dgp/dgp1.o b/src/dgp/dgp1.o deleted file mode 100644 index 996c956..0000000 Binary files a/src/dgp/dgp1.o and /dev/null differ diff --git a/src/dgp/global.d b/src/dgp/global.d deleted file mode 100644 index a6cb932..0000000 --- a/src/dgp/global.d +++ /dev/null @@ -1 +0,0 @@ -global.o global.d : global.cc global.hh diff --git a/src/dgp/global.o b/src/dgp/global.o deleted file mode 100644 index f2adbce..0000000 Binary files a/src/dgp/global.o and /dev/null differ diff --git a/src/dgp/grammar.d b/src/dgp/grammar.d deleted file mode 100644 index 320d55b..0000000 --- a/src/dgp/grammar.d +++ /dev/null @@ -1,3 +0,0 @@ -grammar.o grammar.d : grammar.cc grammar.hh const.hh thesymbols.hh symbol.hh \ - sgraph.hh mgraph.hh ../common/common.h ../common/../lib/const.h \ - ../common/../dgp/cmdline.h boubble.hh global.hh diff --git a/src/dgp/grammar.o b/src/dgp/grammar.o deleted file mode 100644 index 7542e5d..0000000 Binary files a/src/dgp/grammar.o and /dev/null differ diff --git a/src/dgp/main.d b/src/dgp/main.d deleted file mode 100644 index 7fc308a..0000000 --- a/src/dgp/main.d +++ /dev/null @@ -1,3 +0,0 @@ -main.o main.d : main.cc global.hh mgraph.hh const.hh thesymbols.hh symbol.hh \ - ../common/common.h ../common/../lib/const.h ../common/../dgp/cmdline.h \ - sgraph.hh boubble.hh grammar.hh dgp1.hh cmdline.h diff --git a/src/dgp/main.o b/src/dgp/main.o deleted file mode 100644 index c55733e..0000000 Binary files a/src/dgp/main.o and /dev/null differ diff --git a/src/dgp/mgraph.d b/src/dgp/mgraph.d deleted file mode 100644 index 2f0742d..0000000 --- a/src/dgp/mgraph.d +++ /dev/null @@ -1,2 +0,0 @@ -mgraph.o mgraph.d : mgraph.cc mgraph.hh const.hh thesymbols.hh symbol.hh \ - ../common/common.h ../common/../lib/const.h ../common/../dgp/cmdline.h diff --git a/src/dgp/mgraph.o b/src/dgp/mgraph.o deleted file mode 100644 index e34d68b..0000000 Binary files a/src/dgp/mgraph.o and /dev/null differ diff --git a/src/dgp/sgraph.d b/src/dgp/sgraph.d deleted file mode 100644 index a1f23b3..0000000 --- a/src/dgp/sgraph.d +++ /dev/null @@ -1,3 +0,0 @@ -sgraph.o sgraph.d : sgraph.cc sgraph.hh const.hh mgraph.hh thesymbols.hh symbol.hh \ - ../common/common.h ../common/../lib/const.h ../common/../dgp/cmdline.h \ - boubble.hh global.hh grammar.hh diff --git a/src/dgp/sgraph.o b/src/dgp/sgraph.o deleted file mode 100644 index 59c70fa..0000000 Binary files a/src/dgp/sgraph.o and /dev/null differ diff --git a/src/dgp/symbol.d b/src/dgp/symbol.d deleted file mode 100644 index 9ecd7ff..0000000 --- a/src/dgp/symbol.d +++ /dev/null @@ -1 +0,0 @@ -symbol.o symbol.d : symbol.cc symbol.hh diff --git a/src/dgp/symbol.o b/src/dgp/symbol.o deleted file mode 100644 index 16b51d7..0000000 Binary files a/src/dgp/symbol.o and /dev/null differ diff --git a/src/grp/Makefile b/src/grp/Makefile index 4193550..be6a80b 100644 --- a/src/grp/Makefile +++ b/src/grp/Makefile @@ -6,12 +6,14 @@ grp: install: ifdef BIN_DIR install -m 0755 grp $(BIN_DIR) + install -m 0755 ugrp $(BIN_DIR) endif .PHONY: uninstall uninstall: ifdef BIN_DIR rm $(BIN_DIR)/grp + rm $(BIN_DIR)/ugrp endif clean: diff --git a/src/grp/grp b/src/grp/grp index c1e8236..6c50048 100755 --- a/src/grp/grp +++ b/src/grp/grp @@ -156,6 +156,7 @@ $grepre =~ s/\\W/[^a-z # extensions $grepre =~ s/\\l/[a-z±æê³ñ󶼿]/g; #lowercase letter $grepre =~ s/\\L/[A-Z¡ÆÊ£ÑÓ¦¬¯]/g; #upercase letter +$grepre =~ s/`,'/,/g; my $grep_command = ($action =~ /g/) ? "egrep '$grepre'" : " cat "; diff --git a/src/grp/ugrp b/src/grp/ugrp new file mode 100755 index 0000000..40dceb6 --- /dev/null +++ b/src/grp/ugrp @@ -0,0 +1,11 @@ +case $LANG in + pl_PL.UTF-8 ) ARGS='' + for a in "$@" + do + ARG=$(printf '%s' $a | recode -f utf8..l2) + ARGS="$ARGS $ARG" + done + recode -f utf8..l2 | LANG=pl_PL grp $ARGS | recode -f l2..utf8;; + pl_PL|pl_PL.ISO-8859-2 ) grp $@ ;; + * ) echo "LANG variable must be set pl_PL.UTF-8, pl_PL, or pl_PL.ISO-8859-2";; +esac diff --git a/src/kon/Makefile b/src/kon/Makefile index 7bc382d..8a83fae 100644 --- a/src/kon/Makefile +++ b/src/kon/Makefile @@ -6,12 +6,14 @@ kon: install: ifdef BIN_DIR install -m 0755 kon $(BIN_DIR) + install -m 0755 ukon $(BIN_DIR) endif .PHONY: uninstall uninstall: ifdef BIN_DIR rm $(BIN_DIR)/kon + rm $(BIN_DIR)/ukon endif clean: diff --git a/src/kon/ukon b/src/kon/ukon new file mode 100755 index 0000000..c7b1b73 --- /dev/null +++ b/src/kon/ukon @@ -0,0 +1,7 @@ +case $LANG in + pl_PL.UTF-8 ) recode -f utf8..l2 | LANG=pl_PL kon $@ | recode -f l2..utf8;; + pl_PL|pl_PL.ISO-8859-2 ) kon $@ ;; + * ) echo "LANG variable must be set pl_PL.UTF-8, pl_PL, or pl_PL.ISO-8859-2";; +esac + + diff --git a/src/kot/Makefile b/src/kot/Makefile index 653742a..c758ace 100644 --- a/src/kot/Makefile +++ b/src/kot/Makefile @@ -6,12 +6,14 @@ kot: install: ifdef BIN_DIR install -m 0755 kot $(BIN_DIR) + install -m 0755 ukot $(BIN_DIR) endif .PHONY: uninstall uninstall: ifdef BIN_DIR rm $(BIN_DIR)/kot + rm $(BIN_DIR)/ukot endif clean: diff --git a/src/kot/kot b/src/kot/kot index f4b38f3..d99210a 100755 --- a/src/kot/kot +++ b/src/kot/kot @@ -51,15 +51,11 @@ GetOptions("gap-fill|g=s" => \$gap_fill, if($help) { print <<'END' -Usage: ser [OPTIONS] [file ..] +Usage: kot [OPTIONS] [file ..] Options: --gap-fill -g Help. --spaces -r - --define=FILE Read macrodefinitions from FILE. - --flex-template=FILE Read flex code template from FILE. - --only-matching -m Print only fragments matching PATTERN. - --flex Print only the generated flex code and exit. END ; exit 0; @@ -76,17 +72,26 @@ my $count=0; while(<>) { - my ($start,$len,$type,$form) = /^\s*(\d+)\s+(\d+)\s+(\S+)\s+(\S+)/; + my ($start,$len) = /^\s*(\d+)\s+(\d+)/; + my ($type,$form) = /^(?:\d|\s)*(\S+)\s+(\S+)/; - if($start > $prevend) + if ($start && $len) { - print $gap_fill unless $count++ == 0; + if($start > $prevend) + { + print $gap_fill unless $count++ == 0; + } + + $prevend=$start+$len; + + next if $len==0;# || $form eq "*"; } - - $prevend=$start+$len; - - next if $len==0;# || $form eq "*"; - + else + { + next if $form eq "*"; + $prevend = -1; + } + $form =~ s/\\\*/*/g; if($type eq 'S' && ! $spaces) diff --git a/src/kot/ukot b/src/kot/ukot new file mode 100755 index 0000000..a183e44 --- /dev/null +++ b/src/kot/ukot @@ -0,0 +1,5 @@ +case $LANG in + pl_PL.UTF-8 ) recode -f utf8..l2 | LANG=pl_PL kot $@ | recode -f l2..utf8;; + pl_PL|pl_PL.ISO-8859-2 ) kot $@ ;; + * ) echo "LANG variable must be set pl_PL.UTF-8, pl_PL, or pl_PL.ISO-8859-2";; +esac diff --git a/src/lem/Makefile b/src/lem/Makefile index b3a0a98..a72705f 100644 --- a/src/lem/Makefile +++ b/src/lem/Makefile @@ -1,5 +1,7 @@ include ../../config.mak +export LANG=pl_PL + LDFLAGS += -static CXXFLAGS += -O2 -fpermissive @@ -45,10 +47,12 @@ clean.cmdline: install: ifdef BIN_DIR install -m 0755 lem $(BIN_DIR) + install -m 0755 ulem $(BIN_DIR) endif .PHONY: uninstall uninstall: ifdef BIN_DIR rm $(BIN_DIR)/lem + rm $(BIN_DIR)/ulem endif diff --git a/src/lem/ulem b/src/lem/ulem new file mode 100755 index 0000000..33a6858 --- /dev/null +++ b/src/lem/ulem @@ -0,0 +1,7 @@ +case $LANG in + pl_PL.UTF-8 ) recode -f utf8..l2 | LANG=pl_PL lem $@ | recode -f l2..utf8;; + pl_PL|pl_PL.ISO-8859-2 ) lem $@ ;; + * ) echo "LANG variable must be set pl_PL.UTF-8, pl_PL, or pl_PL.ISO-8859-2";; +esac + + diff --git a/src/rm12/rm12 b/src/rm12/rm12 index d4c83fc..d6d80ef 100755 --- a/src/rm12/rm12 +++ b/src/rm12/rm12 @@ -6,5 +6,4 @@ #author: Tomasz Obrebski -/[0-9]+[ \t]+[0-9]+[ \t]+BOS/! -s/[0-9]+[ \t]+[0-9]+[ \t]// \ No newline at end of file +/[0-9]+[ \t]+[0-9]+[ \t]+BOS/! s/[0-9]+[ \t]+[0-9]+[ \t]// \ No newline at end of file diff --git a/src/sen/Makefile b/src/sen/Makefile index 96df701..943b23c 100644 --- a/src/sen/Makefile +++ b/src/sen/Makefile @@ -1,5 +1,7 @@ include ../../config.mak +export LANG=pl_PL + ifeq ($(BUILD_STATIC), yes) LDFLAGS += -static endif @@ -17,12 +19,14 @@ lex.yy.c: sen.l install: ifdef BIN_DIR install -m 0755 sen $(BIN_DIR) + install -m 0755 usen $(BIN_DIR) endif .PHONY: uninstall uninstall: ifdef BIN_DIR rm $(BIN_DIR)/sen + rm $(BIN_DIR)/usen endif clean: clean.flex diff --git a/src/sen/usen b/src/sen/usen new file mode 100755 index 0000000..27c5abf --- /dev/null +++ b/src/sen/usen @@ -0,0 +1,5 @@ +case $LANG in + pl_PL.UTF-8 ) recode -f utf8..l2 | LANG=pl_PL sen $@ | recode -f l2..utf8;; + pl_PL|pl_PL.ISO-8859-2 ) sen $@ ;; + * ) echo "LANG variable must be set pl_PL.UTF-8, pl_PL, or pl_PL.ISO-8859-2";; +esac diff --git a/src/ser/Makefile b/src/ser/Makefile index 7fa4704..05d0370 100644 --- a/src/ser/Makefile +++ b/src/ser/Makefile @@ -6,12 +6,14 @@ ser: install: ifdef BIN_DIR install -m 0755 ser $(BIN_DIR) + install -m 0755 user $(BIN_DIR) endif .PHONY: uninstall uninstall: ifdef BIN_DIR rm $(BIN_DIR)/ser + rm $(BIN_DIR)/user endif clean: diff --git a/src/ser/user b/src/ser/user new file mode 100755 index 0000000..ec9d84c --- /dev/null +++ b/src/ser/user @@ -0,0 +1,15 @@ +ser $@ + +# SAFER BUT SLOWER: +# +# case $LANG in +# pl_PL.UTF-8 ) ARGS='' +# for a in "$@" +# do +# ARG=$(printf '%s' $a | recode -f utf8..l2) +# ARGS="$ARGS $ARG" +# done +# recode -f utf8..l2 | LANG=pl_PL ser $ARGS | recode -f l2..utf8;; +# pl_PL|pl_PL.ISO-8859-2 ) ser $@ ;; +# * ) echo "LANG variable must be set pl_PL.UTF-8, pl_PL, or pl_PL.ISO-8859-2";; +# esac diff --git a/src/tok.c/Makefile b/src/tok.c/Makefile index 53c1673..9a5e7b3 100644 --- a/src/tok.c/Makefile +++ b/src/tok.c/Makefile @@ -1,5 +1,7 @@ include ../../config.mak +export LANG=pl_PL + ifeq ($(BUILD_STATIC), yes) LDFLAGS += -static endif @@ -35,12 +37,14 @@ cmdline.c cmdline.h: cmdline.ggo install: ifdef BIN_DIR install -m 0755 tok_c $(BIN_DIR) + install -m 0755 utok $(BIN_DIR) endif .PHONY: uninstall uninstall: ifdef BIN_DIR rm $(BIN_DIR)/tok_c + rm $(BIN_DIR)/utok endif clean: clean.cmdline diff --git a/src/tok.c/utok b/src/tok.c/utok new file mode 100755 index 0000000..94c2eef --- /dev/null +++ b/src/tok.c/utok @@ -0,0 +1,5 @@ +case $LANG in + pl_PL.UTF-8 ) recode -f utf8..l2 | LANG=pl_PL tok $@ | recode -f l2..utf8;; + pl_PL|pl_PL.ISO-8859-2 ) tok $@ ;; + * ) echo "LANG variable must be set pl_PL.UTF-8, pl_PL, or pl_PL.ISO-8859-2";; +esac