diff --git a/Makefile b/Makefile index 97272fe..1a1772f 100644 --- a/Makefile +++ b/Makefile @@ -40,7 +40,7 @@ share: # cleanup section # ------------------------------------------------------------------ .PHONY: clean -clean: clean_components clean_documentation clean_config +clean: clean_components clean_documentation clean_config clean_config_mak @echo "All files cleaned successfully!" .PHONY: clean_components @@ -58,6 +58,10 @@ clean_documentation: clean_config: cd $(CUR_DIR)/conf && make clean; cd $(CUR_DIR) +.PHONY: clean_config_mak +clean_config_mak: + rm config.mak || true + # ------------------------------------------------------------------ # install section # ------------------------------------------------------------------ diff --git a/configure b/configure index c5513b0..bd20ced 100755 --- a/configure +++ b/configure @@ -8,7 +8,7 @@ CUR_DIR=$(pwd) SRC_DIR="${CUR_DIR}/src" # list of components to compile -CMPLIST="compiledic cor dgp fla gph grp gue kon kor kot lem mar rm12 rs12 sen sen-nl ser tags tok.c tok.l tok.pl unfla" +CMPLIST="compdic cor dgc dgp fla gph grp gue kon kor kot lem mar rm12 rs12 sen sen-nl ser tags tok.c tok.l tok.pl tre unfla" COMP= diff --git a/share/cats.dgc b/share/cats.dgc index fb800be..9248e00 100644 --- a/share/cats.dgc +++ b/share/cats.dgc @@ -1,499 +1,330 @@ -ADJ/DcNpCaGp -ADJ/DcNpCd -ADJ/DcNpCgl -ADJ/DcNpCi -ADJ/DcNpCnavGaifn -ADJ/DcNpCnvGp -ADJ/DcNsCaGi -ADJ/DcNsCaGpa -ADJ/DcNsCaiGf -ADJ/DcNsCavGf -ADJ/DcNsCdGpain -ADJ/DcNsCgdlGf -ADJ/DcNsCgGpain -ADJ/DcNsCilGpain -ADJ/DcNsCnavGn -ADJ/DcNsCnvGpai -ADJ/DpNpCaGp -ADJ/DpNpCd -ADJ/DpNpCgl -ADJ/DpNpCi -ADJ/DpNpCnavGaifn -ADJ/DpNpCnvGp -ADJ/DpNsCaGi -ADJ/DpNsCaGpa -ADJ/DpNsCaiGf -ADJ/DpNsCavGf -ADJ/DpNsCdGpain -ADJ/DpNsCgdlGf -ADJ/DpNsCgGpain -ADJ/DpNsCilGpain -ADJ/DpNsCnavGn -ADJ/DpNsCnvGpai -ADJ/DsNpCaGp -ADJ/DsNpCd -ADJ/DsNpCgl -ADJ/DsNpCi -ADJ/DsNpCnavGaifn -ADJ/DsNpCnvGp -ADJ/DsNsCaGi -ADJ/DsNsCaGpa -ADJ/DsNsCaiGf -ADJ/DsNsCavGf -ADJ/DsNsCdGpain -ADJ/DsNsCgdlGf -ADJ/DsNsCgGpain -ADJ/DsNsCilGpain -ADJ/DsNsCnavGn -ADJ/DsNsCnvGpai -ADJNUM/NpCaGp -ADJNUM/NpCd -ADJNUM/NpCgl -ADJNUM/NpCi -ADJNUM/NpCnavGaifn -ADJNUM/NpCnvGp -ADJNUM/NsCaGi -ADJNUM/NsCaGpa -ADJNUM/NsCaiGf -ADJNUM/NsCavGf -ADJNUM/NsCdGpain -ADJNUM/NsCgdlGf -ADJNUM/NsCgGpain -ADJNUM/NsCilGpain -ADJNUM/NsCnavGn -ADJNUM/NsCnvGpai -ADJPAP/NpCaGp -ADJPAP/NpCd -ADJPAP/NpCgl -ADJPAP/NpCi -ADJPAP/NpCnavGaifn -ADJPAP/NpCnvGp -ADJPAP/NsCaGi -ADJPAP/NsCaGpa -ADJPAP/NsCaiGf -ADJPAP/NsCavGf -ADJPAP/NsCdGpain -ADJPAP/NsCgdlGf -ADJPAP/NsCgGpain -ADJPAP/NsCilGpain -ADJPAP/NsCnavGn -ADJPAP/NsCnvGpai -ADJPP/NpCaGp -ADJPP/NpCd -ADJPP/NpCgl -ADJPP/NpCi -ADJPP/NpCnavGaifn -ADJPP/NpCnvGp -ADJPP/NsCaGi -ADJPP/NsCaGpa -ADJPP/NsCaiGf -ADJPP/NsCavGf -ADJPP/NsCdGpain -ADJPP/NsCgdlGf -ADJPP/NsCgGpain -ADJPP/NsCilGpain -ADJPP/NsCnavGn -ADJPP/NsCnvGpai -ADJPRO/NpCaGp -ADJPRO/NpCd -ADJPRO/NpCgl -ADJPRO/NpCi -ADJPRO/NpCnavGaifn -ADJPRO/NpCnvGp -ADJPRO/NsCaGi -ADJPRO/NsCaGpa -ADJPRO/NsCaiGf -ADJPRO/NsCavGf -ADJPRO/NsCdGpain -ADJPRO/NsCgdlGf -ADJPRO/NsCgGpain -ADJPRO/NsCilGpain -ADJPRO/NsCnavGn -ADJPRO/NsCnvGpai -ADJPRO/ZdNpCaGp -ADJPRO/ZdNpCd -ADJPRO/ZdNpCgl -ADJPRO/ZdNpCi -ADJPRO/ZdNpCnavGaifn -ADJPRO/ZdNpCnvGp -ADJPRO/ZdNsCaGi -ADJPRO/ZdNsCaGpa -ADJPRO/ZdNsCaiGf -ADJPRO/ZdNsCavGf -ADJPRO/ZdNsCdGpain -ADJPRO/ZdNsCgdlGf -ADJPRO/ZdNsCgGpain -ADJPRO/ZdNsCilGpain -ADJPRO/ZdNsCnavGn -ADJPRO/ZdNsCnvGpai -ADJPRO/ZgNpCaGp -ADJPRO/ZgNpCd -ADJPRO/ZgNpCgl -ADJPRO/ZgNpCi -ADJPRO/ZgNpCnavGaifn -ADJPRO/ZgNpCnvGp -ADJPRO/ZgNsCaGi -ADJPRO/ZgNsCaGpa -ADJPRO/ZgNsCaiGf -ADJPRO/ZgNsCavGf -ADJPRO/ZgNsCdGpain -ADJPRO/ZgNsCgdlGf -ADJPRO/ZgNsCgGpain -ADJPRO/ZgNsCilGpain -ADJPRO/ZgNsCnavGn -ADJPRO/ZgNsCnvGpai -ADJPRO/ZiNpCaGp -ADJPRO/ZiNpCd -ADJPRO/ZiNpCgl -ADJPRO/ZiNpCi -ADJPRO/ZiNpCnavGaifn -ADJPRO/ZiNpCnvGp -ADJPRO/ZiNsCaGi -ADJPRO/ZiNsCaGpa -ADJPRO/ZiNsCaiGf -ADJPRO/ZiNsCavGf -ADJPRO/ZiNsCdGpain -ADJPRO/ZiNsCgdlGf -ADJPRO/ZiNsCgGpain -ADJPRO/ZiNsCilGpain -ADJPRO/ZiNsCnavGn -ADJPRO/ZiNsCnvGpai -ADJPRO/ZnNpCaGp -ADJPRO/ZnNpCd -ADJPRO/ZnNpCgl -ADJPRO/ZnNpCi -ADJPRO/ZnNpCnavGaifn -ADJPRO/ZnNpCnvGp -ADJPRO/ZnNsCaGi -ADJPRO/ZnNsCaGpa -ADJPRO/ZnNsCaiGf -ADJPRO/ZnNsCavGf -ADJPRO/ZnNsCdGpain -ADJPRO/ZnNsCgdlGf -ADJPRO/ZnNsCgGpain -ADJPRO/ZnNsCilGpain -ADJPRO/ZnNsCnavGn -ADJPRO/ZnNsCnvGpai -ADJPRO/ZqNpCaGp -ADJPRO/ZqNpCd -ADJPRO/ZqNpCgl -ADJPRO/ZqNpCi -ADJPRO/ZqNpCnavGaifn -ADJPRO/ZqNpCnvGp -ADJPRO/ZqNsCaGi -ADJPRO/ZqNsCaGpa -ADJPRO/ZqNsCaiGf -ADJPRO/ZqNsCavGf -ADJPRO/ZqNsCdGpain -ADJPRO/ZqNsCgdlGf -ADJPRO/ZqNsCgGpain -ADJPRO/ZqNsCilGpain -ADJPRO/ZqNsCnavGn -ADJPRO/ZqNsCnvGpai -ADJPRO/ZqrNpCaGp -ADJPRO/ZqrNpCd -ADJPRO/ZqrNpCgl -ADJPRO/ZqrNpCi -ADJPRO/ZqrNpCnavGaifn -ADJPRO/ZqrNpCnvGp -ADJPRO/ZqrNsCaGi -ADJPRO/ZqrNsCaGpa -ADJPRO/ZqrNsCaiGf -ADJPRO/ZqrNsCavGf -ADJPRO/ZqrNsCdGpain -ADJPRO/ZqrNsCgdlGf -ADJPRO/ZqrNsCgGpain -ADJPRO/ZqrNsCilGpain -ADJPRO/ZqrNsCnavGn -ADJPRO/ZqrNsCnvGpai -ADJPRO/ZsNpCaGp -ADJPRO/ZsNpCd -ADJPRO/ZsNpCgl -ADJPRO/ZsNpCi -ADJPRO/ZsNpCnavGaifn -ADJPRO/ZsNpCnvGp -ADJPRO/ZsNsCaGi -ADJPRO/ZsNsCaGpa -ADJPRO/ZsNsCaiGf -ADJPRO/ZsNsCavGf -ADJPRO/ZsNsCdGpain -ADJPRO/ZsNsCgdlGf -ADJPRO/ZsNsCgGpain -ADJPRO/ZsNsCilGpain -ADJPRO/ZsNsCnavGn -ADJPRO/ZsNsCnvGpai -ADJPRP/NpCaGp -ADJPRP/NpCd -ADJPRP/NpCgl -ADJPRP/NpCi -ADJPRP/NpCnavGaifn -ADJPRP/NpCnvGp -ADJPRP/NsCaGi -ADJPRP/NsCaGpa -ADJPRP/NsCaiGf -ADJPRP/NsCavGf -ADJPRP/NsCdGpain -ADJPRP/NsCgdlGf -ADJPRP/NsCgGpain -ADJPRP/NsCilGpain -ADJPRP/NsCnavGn -ADJPRP/NsCnvGpai +ADJ/CaDcGapNs +ADJ/CaDcGiNs +ADJ/CaDcGpNp +ADJ/CaDpGapNs +ADJ/CaDpGiNs +ADJ/CaDpGpNp +ADJ/CaDsGapNs +ADJ/CaDsGiNs +ADJ/CaDsGpNp +ADJ/CaiDcGfNs +ADJ/CaiDpGfNs +ADJ/CaiDsGfNs +ADJ/CanvDcGafinNp +ADJ/CanvDcGnNs +ADJ/CanvDpGafinNp +ADJ/CanvDpGfNs +ADJ/CanvDpGnNs +ADJ/CanvDsGafinNp +ADJ/CanvDsGnNs +ADJ/CavDcGfNs +ADJ/CavDsGfNs +ADJ/CdDcGainpNs +ADJ/CdDcNp +ADJ/CdDpGainpNs +ADJ/CdDpNp +ADJ/CdDsGainpNs +ADJ/CdDsNp +ADJ/CdglDcGfNs +ADJ/CdglDpGfNs +ADJ/CdglDsGfNs +ADJ/CgDcGainpNs +ADJ/CgDpGainpNs +ADJ/CgDsGainpNs +ADJ/CglDcNp +ADJ/CglDpNp +ADJ/CglDsNp +ADJ/CiDcNp +ADJ/CiDpNp +ADJ/CiDsNp +ADJ/CilDcGainpNs +ADJ/CilDpGainpNs +ADJ/CilDsGainpNs +ADJ/CnvDcGaipNs +ADJ/CnvDcGpNp +ADJ/CnvDpGaipNs +ADJ/CnvDpGpNp +ADJ/CnvDsGaipNs +ADJ/CnvDsGpNp +ADJNUM/CaGapNs +ADJNUM/CaGiNs +ADJNUM/CaGpNp +ADJNUM/CaiGfNs +ADJNUM/CanvGafinNp +ADJNUM/CanvGnNs +ADJNUM/CdGainpNs +ADJNUM/CdglGfNs +ADJNUM/CdNp +ADJNUM/CgGainpNs +ADJNUM/CglNp +ADJNUM/CilGainpNs +ADJNUM/CiNp +ADJNUM/CnvGaipNs +ADJNUM/CnvGfNs +ADJNUM/CnvGpNp +ADJPAP/CaGapNs +ADJPAP/CaGiNs +ADJPAP/CaGpNp +ADJPAP/CaiGfNs +ADJPAP/CanvGafinNp +ADJPAP/CanvGnNs +ADJPAP/CdGainpNs +ADJPAP/CdglGfNs +ADJPAP/CdNp +ADJPAP/CgGainpNs +ADJPAP/CglNp +ADJPAP/CilGainpNs +ADJPAP/CiNp +ADJPAP/CnvGaipNs +ADJPAP/CnvGfNs +ADJPAP/CnvGpNp +ADJPP/CaGapNs +ADJPP/CaGiNs +ADJPP/CaGpNp +ADJPP/CaiGfNs +ADJPP/CanvGafinNp +ADJPP/CanvGnNs +ADJPP/CdGainpNs +ADJPP/CdglGfNs +ADJPP/CdNp +ADJPP/CgGainpNs +ADJPP/CglNp +ADJPP/CilGainpNs +ADJPP/CiNp +ADJPP/CnvGaipNs +ADJPP/CnvGfNs +ADJPP/CnvGpNp +ADJPRO/CaGapNs +ADJPRO/CaGiNs +ADJPRO/CaGpNp +ADJPRO/CaiGfNs +ADJPRO/CanvGafinNp +ADJPRO/CanvGnNs +ADJPRO/CdGainpNs +ADJPRO/CdglGfNs +ADJPRO/CdNp +ADJPRO/CgGainpNs +ADJPRO/CglNp +ADJPRO/CilGainpNs +ADJPRO/CiNp +ADJPRO/CnvGaipNs +ADJPRO/CnvGfNs +ADJPRO/CnvGpNp +ADJPRP/CaGapNs +ADJPRP/CaGiNs +ADJPRP/CaGpNp +ADJPRP/CaiGfNs +ADJPRP/CanvGafinNp +ADJPRP/CanvGnNs +ADJPRP/CdGainpNs +ADJPRP/CdglGfNs +ADJPRP/CdNp +ADJPRP/CgGainpNs +ADJPRP/CglNp +ADJPRP/CilGainpNs +ADJPRP/CiNp +ADJPRP/CnvGaipNs +ADJPRP/CnvGfNs +ADJPRP/CnvGpNp ADVANP +ADVANP/C ADV/Dc ADV/Dp ADV/Ds ADVNUM ADVPRO -ADVPRO/Zd -ADVPRO/Zi -ADVPRO/Zn -ADVPRO/Zq -ADVPRO/Zqr -ADVPRO/Zr +ADVPRO/C ADVPRP +ADVPRP/C APP +BYC/GafinMcNpP1Vp +BYC/GafinMcNpP2Vp +BYC/GafinMcNpP3Vp +BYC/GafinMdNpP1TaVp +BYC/GafinMdNpP2TaVp +BYC/GafinMdNpP3TaVp +BYC/GaipMcNsP1Vp +BYC/GaipMcNsP2Vp +BYC/GaipMcNsP3Vp +BYC/GaipMdNsP1TaVp +BYC/GaipMdNsP2TaVp +BYC/GaipMdNsP3TaVp +BYC/GfMcNsP1Vp +BYC/GfMcNsP2Vp +BYC/GfMcNsP3Vp +BYC/GfMdNsP1TaVp +BYC/GfMdNsP2TaVp +BYC/GfMdNsP3TaVp +BYC/GnMcNsP3Vp +BYC/GnMdNsP3TaVp +BYC/GpMcNpP1Vp +BYC/GpMcNpP2Vp +BYC/GpMcNpP3Vp +BYC/GpMdNpP1TaVp +BYC/GpMdNpP2TaVp +BYC/GpMdNpP3TaVp +BYC/MdNpP1TfrVp +BYC/MdNpP2TfrVp +BYC/MdNpP3TfrVp +BYC/MdNsP1TfrVp +BYC/MdNsP2TfrVp +BYC/MdNsP3TfrVp +BYC/MiNpP1Vp +BYC/MiNpP2Vp +BYC/MiNsP2Vp BYC/Vb -BYC/VpMcNpP1Gaifn -BYC/VpMcNpP1Gp -BYC/VpMcNpP2Gaifn -BYC/VpMcNpP2Gp -BYC/VpMcNpP3Gaifn -BYC/VpMcNpP3Gp -BYC/VpMcNsP1Gf -BYC/VpMcNsP1Gpai -BYC/VpMcNsP2Gf -BYC/VpMcNsP2Gpai -BYC/VpMcNsP3Gf -BYC/VpMcNsP3Gn -BYC/VpMcNsP3Gpai -BYC/VpMdTaNpP1Gaifn -BYC/VpMdTaNpP1Gp -BYC/VpMdTaNpP2Gaifn -BYC/VpMdTaNpP2Gp -BYC/VpMdTaNpP3Gaifn -BYC/VpMdTaNpP3Gp -BYC/VpMdTaNsP1Gf -BYC/VpMdTaNsP1Gpai -BYC/VpMdTaNsP2Gf -BYC/VpMdTaNsP2Gpai -BYC/VpMdTaNsP3Gf -BYC/VpMdTaNsP3Gn -BYC/VpMdTaNsP3Gpai -BYC/VpMdTrfNpP1 -BYC/VpMdTrfNpP2 -BYC/VpMdTrfNpP3 -BYC/VpMdTrfNsP1 -BYC/VpMdTrfNsP2 -BYC/VpMdTrfNsP3 -BYC/VpMiNpP1 -BYC/VpMiNpP2 -BYC/VpMiNsP2 CONJ EXCL -N/GaNpCa -N/GaNpCd -N/GaNpCg -N/GaNpCi -N/GaNpCl -N/GaNpCn -N/GaNpCv -N/GaNsCa -N/GaNsCd -N/GaNsCg -N/GaNsCi -N/GaNsCl -N/GaNsCn -N/GaNsCv -N/GfNpCa -N/GfNpCd -N/GfNpCg -N/GfNpCi -N/GfNpCl -N/GfNpCn -N/GfNpCv -N/GfNsCa -N/GfNsCd -N/GfNsCg -N/GfNsCi -N/GfNsCl -N/GfNsCn -N/GfNsCv -N/GiNpCa -N/GiNpCd -N/GiNpCg -N/GiNpCi -N/GiNpCl -N/GiNpCn -N/GiNpCv -N/GiNsCa -N/GiNsCd -N/GiNsCg -N/GiNsCi -N/GiNsCl -N/GiNsCn -N/GiNsCv -N/GnNpCa -N/GnNpCd -N/GnNpCg -N/GnNpCi -N/GnNpCl -N/GnNpCn -N/GnNpCv -N/GnNsCa -N/GnNsCd -N/GnNsCg -N/GnNsCi -N/GnNsCl -N/GnNsCn -N/GnNsCv -N/G?NpCa -N/G*NpCa -N/G?NpCd -N/G*NpCd -N/G?NpCg -N/G*NpCg -N/G?NpCi -N/G*NpCi -N/G?NpCl -N/G*NpCl -N/G?NpCn -N/G*NpCn -N/G?NpCv -N/G*NpCv -N/G?NsCa -N/G?NsCd -N/G?NsCg -N/G?NsCi -N/G?NsCl -N/G?NsCn -N/G?NsCv -N/GpNpCa -N/GpNpCd -N/GpNpCg -N/GpNpCi -N/GpNpCl -N/GpNpCn -N/GpNpCv -N/GpNsCa -N/GpNsCd -N/GpNsCg -N/GpNsCi -N/GpNsCl -N/GpNsCn -N/GpNsCv -NPRO/ZdGnNsCa -NPRO/ZdGnNsCd -NPRO/ZdGnNsCg -NPRO/ZdGnNsCi -NPRO/ZdGnNsCl -NPRO/ZdGnNsCn -NPRO/ZgGnNsCa -NPRO/ZgGnNsCd -NPRO/ZgGnNsCg -NPRO/ZgGnNsCi -NPRO/ZgGnNsCl -NPRO/ZgGnNsCn -NPRO/ZgGpNpCa -NPRO/ZgGpNpCd -NPRO/ZgGpNpCg -NPRO/ZgGpNpCi -NPRO/ZgGpNpCl -NPRO/ZgGpNpCn -NPRO/ZiGnNsCa -NPRO/ZiGnNsCd -NPRO/ZiGnNsCg -NPRO/ZiGnNsCi -NPRO/ZiGnNsCl -NPRO/ZiGnNsCn -NPRO/ZiGpNsCa -NPRO/ZiGpNsCd -NPRO/ZiGpNsCg -NPRO/ZiGpNsCi -NPRO/ZiGpNsCl -NPRO/ZiGpNsCn -NPRO/ZnGnNsCa -NPRO/ZnGnNsCd -NPRO/ZnGnNsCg -NPRO/ZnGnNsCi -NPRO/ZnGnNsCl -NPRO/ZnGnNsCn -NPRO/ZnGpNsCa -NPRO/ZnGpNsCd -NPRO/ZnGpNsCg -NPRO/ZnGpNsCi -NPRO/ZnGpNsCl -NPRO/ZnGpNsCn -NPRO/ZpGaifnNpCa -NPRO/ZpGaifnNpCd -NPRO/ZpGaifnNpCg -NPRO/ZpGaifnNpCi -NPRO/ZpGaifnNpCl -NPRO/ZpGaifnNpCn -NPRO/ZpGfNsCa -NPRO/ZpGfNsCd -NPRO/ZpGfNsCg -NPRO/ZpGfNsCi -NPRO/ZpGfNsCl -NPRO/ZpGfNsCn -NPRO/ZpGnNsCa -NPRO/ZpGnNsCd -NPRO/ZpGnNsCg -NPRO/ZpGnNsCi -NPRO/ZpGnNsCl -NPRO/ZpGnNsCn -NPRO/ZpG*NpCa -NPRO/ZpG*NpCd -NPRO/ZpG*NpCg -NPRO/ZpG*NpCi -NPRO/ZpG*NpCl -NPRO/ZpG*NpCn -NPRO/ZpG*NsCa -NPRO/ZpG*NsCd -NPRO/ZpG*NsCg -NPRO/ZpG*NsCi -NPRO/ZpG*NsCl -NPRO/ZpG*NsCn -NPRO/ZpGpaiNsCa -NPRO/ZpGpaiNsCd -NPRO/ZpGpaiNsCg -NPRO/ZpGpaiNsCi -NPRO/ZpGpaiNsCl -NPRO/ZpGpaiNsCn -NPRO/ZpGpNpCa -NPRO/ZpGpNpCd -NPRO/ZpGpNpCg -NPRO/ZpGpNpCi -NPRO/ZpGpNpCl -NPRO/ZpGpNpCn -NPRO/ZqGnNsCa -NPRO/ZqGnNsCd -NPRO/ZqGnNsCg -NPRO/ZqGnNsCi -NPRO/ZqGnNsCl -NPRO/ZqGnNsCn -NPRO/ZqGpNsCa -NPRO/ZqGpNsCd -NPRO/ZqGpNsCg -NPRO/ZqGpNsCi -NPRO/ZqGpNsCl -NPRO/ZqGpNsCn -NPRO/ZqrGnNsCa -NPRO/ZqrGnNsCd -NPRO/ZqrGnNsCg -NPRO/ZqrGnNsCi -NPRO/ZqrGnNsCl -NPRO/ZqrGnNsCn -NPRO/ZqrGpNsCa -NPRO/ZqrGpNsCd -NPRO/ZqrGpNsCg -NPRO/ZqrGpNsCi -NPRO/ZqrGpNsCl -NPRO/ZqrGpNsCn -NPRO/ZxG*N*Ca -NPRO/ZxG*N*Cd -NPRO/ZxG*N*Cg -NPRO/ZxG*N*Ci -NPRO/ZxG*N*Cl +N/Ca +N/CaGaNp +N/CaGaNs +N/CaGfNp +N/CaGfNs +N/CaGiNp +N/CaGiNs +N/CaGnNp +N/CaGnNs +N/CaG?Np +N/CaG?Ns +N/CaGpNp +N/CaGpNs +N/CaNp +N/Cd +N/CdGaNp +N/CdGaNs +N/CdGfNp +N/CdGfNs +N/CdGiNp +N/CdGiNs +N/CdGnNp +N/CdGnNs +N/CdG?Np +N/CdG?Ns +N/CdGpNp +N/CdGpNs +N/CdNp +N/Cg +N/CgGaNp +N/CgGaNs +N/CgGfNp +N/CgGfNs +N/CgGiNp +N/CgGiNs +N/CgGnNp +N/CgGnNs +N/CgG?Np +N/CgG?Ns +N/CgGpNp +N/CgGpNs +N/CgNp +N/Ci +N/CiGaNp +N/CiGaNs +N/CiGfNp +N/CiGfNs +N/CiGiNp +N/CiGiNs +N/CiGnNp +N/CiGnNs +N/CiG?Np +N/CiG?Ns +N/CiGpNp +N/CiGpNs +N/CiNp +N/Cl +N/ClGaNp +N/ClGaNs +N/ClGfNp +N/ClGfNs +N/ClGiNp +N/ClGiNs +N/ClGnNp +N/ClGnNs +N/ClG?Np +N/ClG?Ns +N/ClGpNp +N/ClGpNs +N/ClNp +N/Cn +N/CnGaNp +N/CnGaNs +N/CnGfNp +N/CnGfNs +N/CnGiNp +N/CnGiNs +N/CnGnNp +N/CnGnNs +N/CnG?Np +N/CnG?Ns +N/CnGpNp +N/CnGpNs +N/CnNp +N/CvGaNp +N/CvGaNs +N/CvGfNp +N/CvGfNs +N/CvGiNp +N/CvGiNs +N/CvGnNp +N/CvGnNs +N/CvG?Np +N/CvG?Ns +N/CvGpNp +N/CvGpNs +N/CvNp +NPRO/Ca +NPRO/CaGafinNp +NPRO/CaGaipNs +NPRO/CaGfNs +NPRO/CaGnNs +NPRO/CaGpNp +NPRO/CaGpNs +NPRO/CaNp +NPRO/CaNs +NPRO/Cd +NPRO/CdGafinNp +NPRO/CdGaipNs +NPRO/CdGfNs +NPRO/CdGnNs +NPRO/CdGpNp +NPRO/CdGpNs +NPRO/CdNp +NPRO/CdNs +NPRO/Cg +NPRO/CgGafinNp +NPRO/CgGaipNs +NPRO/CgGfNs +NPRO/CgGnNs +NPRO/CgGpNp +NPRO/CgGpNs +NPRO/CgNp +NPRO/CgNs +NPRO/Ci +NPRO/CiGafinNp +NPRO/CiGaipNs +NPRO/CiGfNs +NPRO/CiGnNs +NPRO/CiGpNp +NPRO/CiGpNs +NPRO/CiNp +NPRO/CiNs +NPRO/Cl +NPRO/ClGafinNp +NPRO/ClGaipNs +NPRO/ClGfNs +NPRO/ClGnNs +NPRO/ClGpNp +NPRO/ClGpNs +NPRO/ClNp +NPRO/ClNs +NPRO/CnGafinNp +NPRO/CnGaipNs +NPRO/CnGfNs +NPRO/CnGnNs +NPRO/CnGpNp +NPRO/CnGpNs +NPRO/CnNp +NPRO/CnNs NUMCOL/Ca NUMCOL/Cd NUMCOL/Cg @@ -501,66 +332,44 @@ NUMCOL/Ci NUMCOL/Cl NUMCOL/Cn NUMCRD/Ca -NUMCRD/CaGaifn +NUMCRD/CaGafin NUMCRD/CaGain NUMCRD/CaGf NUMCRD/CaGp NUMCRD/Cd NUMCRD/Cg NUMCRD/Ci +NUMCRD/CiGainp NUMCRD/CiGf -NUMCRD/CiGpain NUMCRD/Cl NUMCRD/Cn -NUMCRD/CnGaifn +NUMCRD/CnGafin NUMCRD/CnGain NUMCRD/CnGf NUMCRD/CnGp -NUMCRD/ZiCaGaifn -NUMCRD/ZiCaGain -NUMCRD/ZiCaGf -NUMCRD/ZiCaGp -NUMCRD/ZiCd -NUMCRD/ZiCg -NUMCRD/ZiCi -NUMCRD/ZiCiGf -NUMCRD/ZiCiGpain -NUMCRD/ZiCl -NUMCRD/ZiCnGaifn -NUMCRD/ZiCnGain -NUMCRD/ZiCnGf -NUMCRD/ZiCnGp -NUMCRD/ZqiCaGaifn -NUMCRD/ZqiCaGp -NUMCRD/ZqiCd -NUMCRD/ZqiCg -NUMCRD/ZqiCi -NUMCRD/ZqiCl -NUMCRD/ZqiCnGaifn -NUMCRD/ZqiCnGp -NUMORD/NpCaGp -NUMORD/NpCd -NUMORD/NpCgl -NUMORD/NpCi -NUMORD/NpCnavGaifn -NUMORD/NpCnvGp -NUMORD/NsCaGi -NUMORD/NsCaGpa -NUMORD/NsCaiGf -NUMORD/NsCavGf -NUMORD/NsCdGpain -NUMORD/NsCgdlGf -NUMORD/NsCgGpain -NUMORD/NsCilGpain -NUMORD/NsCnavGn -NUMORD/NsCnvGpai +NUMORD/CaGapNs +NUMORD/CaGiNs +NUMORD/CaGpNp +NUMORD/CaiGfNs +NUMORD/CanvGafinNp +NUMORD/CanvGnNs +NUMORD/CdGainpNs +NUMORD/CdglGfNs +NUMORD/CdNp +NUMORD/CgGainpNs +NUMORD/CglNp +NUMORD/CilGainpNs +NUMORD/CiNp +NUMORD/CnvGaipNs +NUMORD/CnvGfNs +NUMORD/CnvGpNp NUMPAR NUMPAR/Dc NUMPAR/Dp NUMPAR/Ds +NUMPAR/GainpNs +NUMPAR/GfNs NUMPAR/Ns -NUMPAR/NsGf -NUMPAR/NsGpain NV/Ca NV/Cd NV/Cg @@ -571,126 +380,125 @@ ONO P PART P/Ca +P/Cag +P/Cagi P/Cai P/Cal P/Cd +P/Cdg P/Cg -P/Cga -P/Cgai -P/Cgd P/Cgi -P/Ci P/Cl -PPRO/Zp +PPRO/C +V/AiGafinMcNpP1Vp +V/AiGafinMcNpP2Vp +V/AiGafinMcNpP3Vp +V/AiGafinMdNpP1TaVp +V/AiGafinMdNpP2TaVp +V/AiGafinMdNpP3TaVp +V/AiGaipMcNsP1Vp +V/AiGaipMcNsP2Vp +V/AiGaipMcNsP3Vp +V/AiGaipMdNsP1TaVp +V/AiGaipMdNsP2TaVp +V/AiGaipMdNsP3TaVp +V/AiGfMcNsP1Vp +V/AiGfMcNsP2Vp +V/AiGfMcNsP3Vp +V/AiGfMdNsP1TaVp +V/AiGfMdNsP2TaVp +V/AiGfMdNsP3TaVp +V/AiGnMcNsP3Vp +V/AiGnMdNsP3TaVp +V/AiGpMcNpP1Vp +V/AiGpMcNpP2Vp +V/AiGpMcNpP3Vp +V/AiGpMdNpP1TaVp +V/AiGpMdNpP2TaVp +V/AiGpMdNpP3TaVp +V/AiMdNpP1TfrVp +V/AiMdNpP2TfrVp +V/AiMdNpP3TfrVp +V/AiMdNsP1TfrVp +V/AiMdNsP2TfrVp +V/AiMdNsP3TfrVp +V/AiMiNpP1Vp +V/AiMiNpP2Vp +V/AiMiNsP2Vp +V/AiTaVi V/AiVb -V/AiViTa -V/AiVpMcNpP1Gaifn -V/AiVpMcNpP1Gp -V/AiVpMcNpP2Gaifn -V/AiVpMcNpP2Gp -V/AiVpMcNpP3Gaifn -V/AiVpMcNpP3Gp -V/AiVpMcNsP1Gf -V/AiVpMcNsP1Gpai -V/AiVpMcNsP2Gf -V/AiVpMcNsP2Gpai -V/AiVpMcNsP3Gf -V/AiVpMcNsP3Gn -V/AiVpMcNsP3Gpai -V/AiVpMdTaNpP1Gaifn -V/AiVpMdTaNpP1Gp -V/AiVpMdTaNpP2Gaifn -V/AiVpMdTaNpP2Gp -V/AiVpMdTaNpP3Gaifn -V/AiVpMdTaNpP3Gp -V/AiVpMdTaNsP1Gf -V/AiVpMdTaNsP1Gpai -V/AiVpMdTaNsP2Gf -V/AiVpMdTaNsP2Gpai -V/AiVpMdTaNsP3Gf -V/AiVpMdTaNsP3Gn -V/AiVpMdTaNsP3Gpai -V/AiVpMdTrfNpP1 -V/AiVpMdTrfNpP2 -V/AiVpMdTrfNpP3 -V/AiVpMdTrfNsP1 -V/AiVpMdTrfNsP2 -V/AiVpMdTrfNsP3 -V/AiVpMiNpP1 -V/AiVpMiNpP2 -V/AiVpMiNsP2 +V/ApGafinMcNpP1Vp +V/ApGafinMcNpP2Vp +V/ApGafinMcNpP3Vp +V/ApGafinMdNpP1TaVp +V/ApGafinMdNpP2TaVp +V/ApGafinMdNpP3TaVp +V/ApGaipMcNsP1Vp +V/ApGaipMcNsP2Vp +V/ApGaipMcNsP3Vp +V/ApGaipMdNsP1TaVp +V/ApGaipMdNsP2TaVp +V/ApGaipMdNsP3TaVp +V/ApGfMcNsP1Vp +V/ApGfMcNsP2Vp +V/ApGfMcNsP3Vp +V/ApGfMdNsP1TaVp +V/ApGfMdNsP2TaVp +V/ApGfMdNsP3TaVp +V/ApGnMcNsP3Vp +V/ApGnMdNsP3TaVp +V/ApGpMcNpP1Vp +V/ApGpMcNpP2Vp +V/ApGpMcNpP3Vp +V/ApGpMdNpP1TaVp +V/ApGpMdNpP2TaVp +V/ApGpMdNpP3TaVp +V/ApMdNpP1TfrVp +V/ApMdNpP2TfrVp +V/ApMdNpP3TfrVp +V/ApMdNsP1TfrVp +V/ApMdNsP2TfrVp +V/ApMdNsP3TfrVp +V/ApMiNpP1Vp +V/ApMiNpP2Vp +V/ApMiNsP2Vp +V/ApTaVi V/ApVb -V/ApViTa -V/ApVpMcNpP1Gaifn -V/ApVpMcNpP1Gp -V/ApVpMcNpP2Gaifn -V/ApVpMcNpP2Gp -V/ApVpMcNpP3Gaifn -V/ApVpMcNpP3Gp -V/ApVpMcNsP1Gf -V/ApVpMcNsP1Gpai -V/ApVpMcNsP2Gf -V/ApVpMcNsP2Gpai -V/ApVpMcNsP3Gf -V/ApVpMcNsP3Gn -V/ApVpMcNsP3Gpai -V/ApVpMdTaNpP1Gaifn -V/ApVpMdTaNpP1Gp -V/ApVpMdTaNpP2Gaifn -V/ApVpMdTaNpP2Gp -V/ApVpMdTaNpP3Gaifn -V/ApVpMdTaNpP3Gp -V/ApVpMdTaNsP1Gf -V/ApVpMdTaNsP1Gpai -V/ApVpMdTaNsP2Gf -V/ApVpMdTaNsP2Gpai -V/ApVpMdTaNsP3Gf -V/ApVpMdTaNsP3Gn -V/ApVpMdTaNsP3Gpai -V/ApVpMdTrfNpP1 -V/ApVpMdTrfNpP2 -V/ApVpMdTrfNpP3 -V/ApVpMdTrfNsP1 -V/ApVpMdTrfNsP2 -V/ApVpMdTrfNsP3 -V/ApVpMiNpP1 -V/ApVpMiNpP2 -V/ApVpMiNsP2 +V/GafinMcNpP1Vp +V/GafinMcNpP2Vp +V/GafinMcNpP3Vp +V/GafinMdNpP1TaVp +V/GafinMdNpP2TaVp +V/GafinMdNpP3TaVp +V/GaipMcNsP1Vp +V/GaipMcNsP2Vp +V/GaipMcNsP3Vp +V/GaipMdNsP1TaVp +V/GaipMdNsP2TaVp +V/GaipMdNsP3TaVp +V/GfMcNsP1Vp +V/GfMcNsP2Vp +V/GfMcNsP3Vp +V/GfMdNsP1TaVp +V/GfMdNsP2TaVp +V/GfMdNsP3TaVp +V/GiMdNpP1TfrVp +V/GiMdNpP2TfrVp +V/GiMdNpP3TfrVp +V/GiMdNsP1TfrVp +V/GiMdNsP2TfrVp +V/GiMdNsP3TfrVp +V/GiMiNpP1Vp +V/GiMiNpP2Vp +V/GiMiNsP2Vp +V/GiTaVi V/GiVb -V/GiViTa -V/GiVpMcNpP1Gaifn -V/GiVpMcNpP1Gp -V/GiVpMcNpP2Gaifn -V/GiVpMcNpP2Gp -V/GiVpMcNpP3Gaifn -V/GiVpMcNpP3Gp -V/GiVpMcNsP1Gf -V/GiVpMcNsP1Gpai -V/GiVpMcNsP2Gf -V/GiVpMcNsP2Gpai -V/GiVpMcNsP3Gf -V/GiVpMcNsP3Gn -V/GiVpMcNsP3Gpai -V/GiVpMdTaNpP1Gaifn -V/GiVpMdTaNpP1Gp -V/GiVpMdTaNpP2Gaifn -V/GiVpMdTaNpP2Gp -V/GiVpMdTaNpP3Gaifn -V/GiVpMdTaNpP3Gp -V/GiVpMdTaNsP1Gf -V/GiVpMdTaNsP1Gpai -V/GiVpMdTaNsP2Gf -V/GiVpMdTaNsP2Gpai -V/GiVpMdTaNsP3Gf -V/GiVpMdTaNsP3Gn -V/GiVpMdTaNsP3Gpai -V/GiVpMdTrfNpP1 -V/GiVpMdTrfNpP2 -V/GiVpMdTrfNpP3 -V/GiVpMdTrfNsP1 -V/GiVpMdTrfNsP2 -V/GiVpMdTrfNsP3 -V/GiVpMiNpP1 -V/GiVpMiNpP2 -V/GiVpMiNsP2 +V/GnMcNsP3Vp +V/GnMdNsP3TaVp +V/GpMcNpP1Vp +V/GpMcNpP2Vp +V/GpMcNpP3Vp +V/GpMdNpP1TaVp +V/GpMdNpP2TaVp +V/GpMdNpP3TaVp VNI diff --git a/share/pl_PL.ISO-8859-2/lem.bin b/share/pl_PL.ISO-8859-2/lem.bin index ebfd176..781a5f6 100644 Binary files a/share/pl_PL.ISO-8859-2/lem.bin and b/share/pl_PL.ISO-8859-2/lem.bin differ diff --git a/src/compdic/Makefile b/src/compdic/Makefile new file mode 100644 index 0000000..f4e8282 --- /dev/null +++ b/src/compdic/Makefile @@ -0,0 +1,34 @@ +include ../../config.mak + +LDFLAGS += -static +CXXFLAGS += -O2 -fpermissive + +all: compdic aut2fsa + +.PHONY: compdic +compdic: + +aut2fsa: aut2fsa.cc + g++ $(CXXFLAGS) $(LDFLAGS) -o aut2fsa aut2fsa.cc + +.PHONY: install +install: +ifdef BIN_DIR + install -m 0755 compdic $(BIN_DIR) + install -m 0755 fsm2aut $(BIN_DIR) + install -m 0755 aut2fsa $(BIN_DIR) + install -m 0755 lst2fstext $(BIN_DIR) +endif + +.PHONY: uninstall +uninstall: +ifdef BIN_DIR + rm $(BIN_DIR)/compdic + rm $(BIN_DIR)/fsm2aut + rm $(BIN_DIR)/aut2fsa + rm $(BIN_DIR)/lst2fstext +endif + +.PHONY: clean +clean: + rm aut2fsa diff --git a/src/compdic/aut2fsa.cc b/src/compdic/aut2fsa.cc new file mode 100644 index 0000000..a6a695f --- /dev/null +++ b/src/compdic/aut2fsa.cc @@ -0,0 +1,18 @@ + +#include +#include + +#include "../lib/tfti.h" + +#include + +using namespace std; + +int main() +{ + TFTiv a; + a.read(); + a.save(); + + return 0; +} diff --git a/src/compdic/compdic b/src/compdic/compdic new file mode 100755 index 0000000..d111f5c --- /dev/null +++ b/src/compdic/compdic @@ -0,0 +1,176 @@ + +no_of_parts=0 + +while [ $# -gt 2 ] +do + case $1 + in + -p) + no_of_parts=$2 + shift 2 + ;; + + *) + echo "The arguments to use are" + echo "-p: number of parts" + shift 1 + ;; + esac +done + +if [ $# -lt 2 ] +then + echo "Usage:" + echo " compdic [-p ] " + echo "where" + echo " - file containig a list of words, one per line, iso-8859-2 encoded" + echo " - a file to which the compiled automaton (cor/kor format) shoul be written" + exit 0 +fi + +if [ $no_of_parts -eq 0 ] +then + no_of_parts=$(( `cat $1 | wc -l` / 75000 + 1 )) +fi + + +echo number of parts: $no_of_parts + + +tempdir=`mktemp -d /tmp/compdic.XXXXXX` + +alphabet=`tempfile -d $tempdir` + +cat < $alphabet + 0 +a 1 +A 2 +ä 3 +ą 4 +Ą 5 +b 6 +B 7 +c 8 +C 9 +ć 10 +Ć 11 +d 12 +D 13 +e 14 +E 15 +é 16 +ę 17 +Ę 18 +f 19 +F 20 +g 21 +G 22 +h 23 +H 24 +i 25 +I 26 +j 27 +J 28 +k 29 +K 30 +l 31 +L 32 +ł 33 +Ł 34 +m 35 +M 36 +n 37 +N 38 +ń 39 +Ń 40 +o 41 +O 42 +ö 43 +ó 44 +Ó 45 +p 46 +P 47 +q 48 +Q 49 +r 50 +R 51 +s 52 +S 53 +ś 54 +Ś 55 +t 56 +T 57 +u 58 +U 59 +ü 60 +v 61 +V 62 +w 63 +W 64 +x 65 +X 66 +y 67 +Y 68 +z 69 +Z 70 +ź 71 +Ź 72 +ż 73 +Ż 74 +0 75 +1 76 +2 77 +3 78 +4 79 +5 80 +6 81 +7 82 +8 83 +9 84 +_ 85 +- 86 +? 87 +! 88 +~ 89 +; 90 +, 91 +/ 92 +* 93 ++ 94 +EOF + + +no_of_lines=$(( (`cat $1 | wc -l` / $no_of_parts) + 1 )) + +split -l $no_of_lines $1 $tempdir/part. + +automaton=$tempdir/output.fst + +cat < $automaton +EOF + +n=0 + +for f in $tempdir/part.* +do + temp1=`tempfile -d $tempdir` + temp2=`tempfile -d $tempdir` + temp3=`tempfile -d $tempdir` + + n=$(( $n + 1 )) + echo processing part $n + + cat $f |\ + lst2fstext |\ + fstcompile --acceptor --isymbols=$alphabet |\ + fstrmepsilon |\ + fstdeterminize > $temp1 + fstminimize $temp1 $temp2 + + fstunion $automaton $temp2 | fstrmepsilon | fstdeterminize > $temp3 + fstminimize $temp3 $automaton +done + +cat $automaton | fsttopsort | fstprint --acceptor --isymbols=$alphabet > aaaa +cat aaaa | fsm2aut | aut2fsa > $2 +rm -r $tempdir diff --git a/src/compdic/fsm2aut b/src/compdic/fsm2aut new file mode 100755 index 0000000..ee25876 --- /dev/null +++ b/src/compdic/fsm2aut @@ -0,0 +1,44 @@ +#!/usr/bin/perl + +my $currstate=-1; +my @states; +my @final; +my $tn=0; + +while(<>) +{ + if(/^\s*([0-9]+)\s+([0-9]+)\s+(.)(\s*)?$/) + { + push @{$states[$1]}, ($3, $2); + $#states=$2 if $#states<$2; + $tn++; + } + elsif(/^\s*([0-9]+)\s*$/) + { + $final[$1]=1; + $#states=$1 if $#states<$1; + } + else + { + die("Input error."); + } +} + +print scalar(@states)," ",$tn," char void\n"; + +my $i=0; +my $width=int(log(@states+1)/log(10)); +foreach $stateref (@states) +{ + $f = ($final[$i]?"+":"-"); + printf "%${width}d %s",$i++,$f; + while(@$stateref) + { + $c=shift @$stateref; + $s=shift @$stateref; + print " $c $s"; + } + print "\n"; +} + + diff --git a/src/compdic/lst2fstext b/src/compdic/lst2fstext new file mode 100755 index 0000000..0393541 --- /dev/null +++ b/src/compdic/lst2fstext @@ -0,0 +1,21 @@ +#!/usr/bin/env perl + +use locale; + +$s=1; + +$f=1; +while(<>) +{ + chomp; + next if / /; + @cs = split(''); + ++$s; + print "0 $s \n"; + while($c = shift @cs) + { + print $s . ' ' . ++$s . " $c\n"; + } + print "$s $f \n"; +} +print "$f\n"; diff --git a/src/compiledic/Makefile b/src/compdic_utf8/Makefile similarity index 74% rename from src/compiledic/Makefile rename to src/compdic_utf8/Makefile index dc373e8..9462bd6 100644 --- a/src/compiledic/Makefile +++ b/src/compdic_utf8/Makefile @@ -1,14 +1,14 @@ include ../../config.mak -all: compiledic +all: compdic -.PHONY: compiledic +.PHONY: compdic compiledic: .PHONY: install install: ifdef BIN_DIR - install -m 0755 compiledic $(BIN_DIR) + install -m 0755 compdic $(BIN_DIR) install -m 0755 text2fst.py $(BIN_DIR) install -m 0755 symbols.py $(BIN_DIR) endif @@ -16,7 +16,7 @@ endif .PHONY: uninstall uninstall: ifdef BIN_DIR - rm $(BIN_DIR)/compiledic + rm $(BIN_DIR)/compdic rm $(BIN_DIR)/text2fst.py rm $(BIN_DIR)/symbols.py endif diff --git a/src/compiledic/compiledic b/src/compdic_utf8/compdic similarity index 100% rename from src/compiledic/compiledic rename to src/compdic_utf8/compdic diff --git a/src/compiledic/removeBom.sh b/src/compdic_utf8/removeBom.sh similarity index 100% rename from src/compiledic/removeBom.sh rename to src/compdic_utf8/removeBom.sh diff --git a/src/compiledic/symbols.py b/src/compdic_utf8/symbols.py similarity index 100% rename from src/compiledic/symbols.py rename to src/compdic_utf8/symbols.py diff --git a/src/compiledic/text2fst.py b/src/compdic_utf8/text2fst.py similarity index 100% rename from src/compiledic/text2fst.py rename to src/compdic_utf8/text2fst.py diff --git a/src/dgc.rb/dgc.rb b/src/dgc.rb/dgc.rb new file mode 100644 index 0000000..c96983d --- /dev/null +++ b/src/dgc.rb/dgc.rb @@ -0,0 +1,319 @@ +#!/usr/bin/ruby1.9 + +#package: UAM Text Tools +#component: dgc (dg compiler) +#version: 2.0 +#author: Tomasz Obrebski + +use lib "/usr/local/lib/utt"; +use lib "$ENV{'HOME'}/.local/lib/utt"; + +use strict; +use Getopt::Long; +use Data::Dumper; +use attr; +use File::HomeDir; + +my $systemconfigfile='/usr/local/etc/utt/dgc.conf'; +my $userconfigfile=home()."/.utt/dgc.conf"; + +Getopt::Long::Configure('no_ignore_case_always'); + +my $help=0; +my $catfile=0; +my $dicfile=0; +my $gramfile=0; +my $outputfile=0; + +#read configuration files########################### +my $file; +foreach $file ($systemconfigfile, $userconfigfile){ + if(open(CONFIG, $file)){ + while () { + chomp; + s/#.*//; + s/^\s+//; + s/\s+$//; + next unless length; + my ($name, $value) = split(/\s*=\s*/, $_, 2); + if(($name eq "categories")or($name eq "c")){ + $catfile=$value; + } + elsif(($name eq "dictionary")or($name eq "d")){ + $dicfile=$value; + } + elsif(($name eq "grammar")or($name eq "g")){ + $gramfile=$value; + } + elsif(($name eq "outputfile")or($name eq "o")){ + $outputfile=$value; + } + elsif(($name eq "help")or($name eq "h")){ + $help=1; + } + + } + close CONFIG; + } +} +######################################################### + +GetOptions("help|h" => \$help, + "categories|c=s" => \$catfile, + "dictionary|d=s" => \$dicfile, + "grammar|g=s" => \$gramfile, + "outputfile|o=s" => \$outputfile); + +my $homedir = $ENV{'HOME'}; +$catfile =~ s/~/$homedir/; +$dicfile =~ s/~/$homedir/; +$gramfile =~ s/~/$homedir/; +$outputfile =~ s/~/$homedir/; + + +if($help) +{ + print <<'END' +Usage: dgc [OPTIONS] + +Options: + --categories -c filename List of syntactic categories. + --dictionary -d filename Dictionary. + --grammar -g filename List of grammar rules. + --outputfile -o filename Output file name. + --help -h Help. +END +; + exit 0; +} + +die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile; + +my $ncat=0; +my $nrole=0; +my $nsgl=0; +my $nleft=0; +my $nright=0; +my $nreq=0; +my $nlink=0; +my $nflag=0; +my $nlong=0; +my $nclass=0; + +my %cats; +my %classes; +my %roles; +my %agr; +my %gov; + +if(!$outputfile) { + *OUTPUT = *STDOUT; +} +elsif($outputfile eq "-") { + *OUTPUT = *STDOUT; +} +else { + open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!"); +} + + +loadcats($catfile) if $catfile; +extractcats($dicfile) if $dicfile; + + +my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/; + +my $class_re = qr/(?:\@\w+)/; + +# class parse_class: +# /$attr::cat_re/g; + + +if(!$gramfile) { + *INPUT = *STDIN; +} +elsif($gramfile eq "-"){ + *INPUT = *STDIN; +} +else { + open(INPUT, $gramfile) or die("Unable to open: $gramfile!"); +} + +while() +{ + s/#.*//; + s/^\s+//; + s/\s+$//; + if(/^AGR\s+(\S+)\s+(\S+)$/) + { + push @{$agr{$1}}, $2; + } + elsif(/^GOV\s+(\S+)\s+(\S+)$/) + { + push @{$gov{$1}}, attr::parse($2); + } + elsif(/^ROLE\s+\S+$/) + { + $roles{$_}=1; + print OUTPUT "$_\n"; + } + elsif(/^SGL\s+\S+$/) + { + ++$nsgl; + print OUTPUT "$_\n"; + } + elsif(/^REQ\s+(\S+)\s+(\S+)$/) + { + print OUTPUT "#$_\n"; + my $cat = attr::parse $1; + for my $atomcat (keys %cats) + { + if(attr::match @$cat, @{$cats{$atomcat}}) + { + print OUTPUT "REQ ".$atomcat." $2\n"; + ++$nreq; + } + } + } + elsif(/^LEFT\s+\S+$/) + { + ++$nleft; + print OUTPUT "$_\n"; + } + elsif(/^RIGHT\s+\S+$/) + { + ++$nright; + print OUTPUT "$_\n"; + } + elsif(my ($hs,$ds,$r) = /^LINK\s+($cats_re)\s+($cats_re)\s+(\S+)$/) + { + print OUTPUT "#$_\n"; + for my $h ($hs =~ /$attr::cat_re/g) + { + for my $d ($ds =~ /$attr::cat_re/g) + { + addlinks($h,$d,$r); + } + } + } + elsif(/^FLAG\s+\S+$/) + { + ++$nflag; + print OUTPUT "$_\n" + } + elsif(/^LONG\s+\S+(\s+<\S+)*(\s+\S+)*$/) + { + ++$nlong; + print OUTPUT "$_\n" + } + elsif(my ($cl,$cs) = /^CLASS\s+(\S+)\s*\(.*)/) + { + print OUTPUT "#$_\n"; + for my $c ($cs =~ /\S+/g) + { + my $cat = attr::parse $c; + + for my $atomcat (sort(keys %cats)) + { + if(attr::match @$cat, @{$cats{$atomcat}}) + { + print OUTPUT "CLASS $cl $atomcat\n"; + ++$nclass; + } + } + } + } + elsif(/^$/) { + # pomijamy puste linie oraz komentarze + } + else + { + print STDERR "Illegal format: $_\n"; + } +} + + +sub addlinks +{ + my ($h,$d,$r) = @_; + + for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; } + for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; } + my $head = attr::parse $h; + my $dep = attr::parse $d; + + for my $atomhead (keys %cats) + { + if(attr::match @$head, @{$cats{$atomhead}}) + { + DEP: + for my $atomdep (keys %cats) + { + next DEP if ! attr::match @$dep, @{$cats{$atomdep}}; + + for my $a (@{$agr{$r}}) + { + next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a); + } + + for my $c (@{$gov{$r}}) + { + next DEP if ! attr::match(@$c,@{$cats{$atomdep}}); + } + + print OUTPUT "LINK "; + print OUTPUT $atomhead." "; + print OUTPUT $atomdep." $r\n"; + ++$nlink; + + } + } + } +} + + +printf STDERR "%6d CAT statements\n", 0+keys(%cats); +printf STDERR "%6d ROLE statements\n", 0+keys(%roles); +printf STDERR "%6d SGL statements\n", $nsgl; +printf STDERR "%6d REQ statements\n", $nreq; +printf STDERR "%6d LEFT statements\n", $nleft; +printf STDERR "%6d RIGHT statements\n", $nright; +printf STDERR "%6d LINK statements\n", $nlink; +printf STDERR "%6d CLASS statements\n", $nclass; +printf STDERR "%6d FLAG statements\n", $nflag; + + +sub extractcats +{ + my $file = shift; + open DICFILE, "$file"; + while() + { + while(/,([^[:space:];]+)/g) + { + my $cat=$1; + next if !$cat || exists $cats{$cat}; + $ncat++; + print OUTPUT "CAT $1\n"; + $cats{$cat}=attr::parse($cat); + } + } + close DICFILE; +} + + +sub loadcats +{ + my $file = shift; + open CATFILE, "$file"; + while() + { + tr/ \t\n//d; + next if !$_ || exists $cats{$_}; + print OUTPUT "CAT $_\n"; + ++$ncat; + $cats{$_}=attr::parse($_); + } + close CATFILE; +} + diff --git a/src/dgc/Makefile b/src/dgc/Makefile new file mode 100644 index 0000000..bb2d23b --- /dev/null +++ b/src/dgc/Makefile @@ -0,0 +1,17 @@ +include ../../config.mak + +dgc: + +.PHONY: install +install: +ifdef BIN_DIR + install -m 0755 dgc $(BIN_DIR) +endif + +.PHONY: uninstall +uninstall: +ifdef BIN_DIR + rm $(BIN_DIR)/dgc +endif + +clean: diff --git a/src/dgc/dgc b/src/dgc/dgc new file mode 100755 index 0000000..ba38796 --- /dev/null +++ b/src/dgc/dgc @@ -0,0 +1,473 @@ +#!/usr/bin/perl + +#package: UAM Text Tools +#component: dgc (dg compiler) +#version: 1.0 +#author: Tomasz Obrebski + +use lib "/usr/local/lib/utt"; +use lib "$ENV{'HOME'}/.local/lib/utt"; + +use strict; +use Getopt::Long; +use Data::Dumper; +use attr; +use File::HomeDir; + +my $systemconfigfile='/etc/utt/dgc.conf'; +my $userconfigfile=home()."/.utt/dgc.conf"; + +Getopt::Long::Configure('no_ignore_case_always'); + +my $help=0; +my $catfile=0; +my $dicfile=0; +my $gramfile=0; +my $outputfile=0; + +#read configuration files########################### +my $file; +foreach $file ($systemconfigfile, $userconfigfile){ + if(open(CONFIG, $file)){ + while () { + chomp; + s/#.*//; + s/^\s+//; + s/\s+$//; + next unless length; + my ($name, $value) = split(/\s*=\s*/, $_, 2); + if(($name eq "categories")or($name eq "c")){ + $catfile=$value; + } + elsif(($name eq "dictionary")or($name eq "d")){ + $dicfile=$value; + } + elsif(($name eq "grammar")or($name eq "g")){ + $gramfile=$value; + } + elsif(($name eq "outputfile")or($name eq "o")){ + $outputfile=$value; + } + elsif(($name eq "help")or($name eq "h")){ + $help=1; + } + + } + close CONFIG; + } +} +######################################################### + +GetOptions("help|h" => \$help, + "categories|c=s" => \$catfile, + "dictionary|d=s" => \$dicfile, + "grammar|g=s" => \$gramfile, + "outputfile|o=s" => \$outputfile); + +my $homedir = $ENV{'HOME'}; +$catfile =~ s/~/$homedir/; +$dicfile =~ s/~/$homedir/; +$gramfile =~ s/~/$homedir/; +$outputfile =~ s/~/$homedir/; + + +if($help) +{ + print <<'END' +Usage: dgc [OPTIONS] + +Options: + --categories -c filename List of syntactic categories. + --dictionary -d filename Dictionary. + --grammar -g filename List of grammar rules. + --outputfile -o filename Output file name. + --help -h Help. +END +; + exit 0; +} + +die("At least one of --cats and --dic must be given.\n") if !$catfile && !$dicfile; + +my $ncat=0; +my $nrole=0; +my $nsgl=0; +my $nleft=0; +my $nright=0; +my $ninitr=0; +my $nfinr=0; +my $ninitf=0; +my $nfinf=0; +my $ninitc=0; +my $nfinc=0; +my $nreq=0; +my $nlink=0; +my $nflag=0; +my $nset=0; +my $npass=0; +my $nlong=0; +my $nconstr=0; +my $nclass=0; + +my %cats; +my %roles; +my %agr; +my %gov; + +if(!$outputfile) { + *OUTPUT = *STDOUT; +} +elsif($outputfile eq "-") { + *OUTPUT = *STDOUT; +} +else { + open(OUTPUT, ">$outputfile") or die("Can't open output file: $outputfile!"); +} + + +loadcats($catfile) if $catfile; +extractcats($dicfile) if $dicfile; + + +my $cats_re = qr/(?:$attr::cat_re\s*(?:,\s*$attr::cat_re)*)/; +my $class_re = qr/(?:\@\w+)/; + +my $avlist_re = $attr::avlist_re; + +my $role_re = qr/(?:[[:lower:][:digit:]_]+)/; +my $prop_re = qr/(?:\&[[:upper:]]+)/; +my $proplist_re = qr/(?:$prop_re+)/; + +# class parse_class: +# /$attr::cat_re/g; + + +if(!$gramfile) { + *INPUT = *STDIN; +} +elsif($gramfile eq "-"){ + *INPUT = *STDIN; +} +else { + open(INPUT, $gramfile) or die("Unable to open: $gramfile!"); +} + +while() +{ + s/#.*//; + s/^\s+//; + s/\s+$//; + if(/^AGR\s+(\S+)\s+(\S+)$/) + { + push @{$agr{$1}}, $2; + } + elsif(/^GOV\s+(\S+)\s+(\S+)$/) + { + push @{$gov{$1}}, attr::parse($2); + } + elsif(/^ROLE\s+\S+$/) + { + $roles{$_}=1; + print OUTPUT "$_\n"; + } + elsif(/^SGL\s+\S+$/) + { + ++$nsgl; + print OUTPUT "$_\n"; + } + elsif(/^REQ\s+(\S+)\s+(\S+)$/) + { + print OUTPUT "#$_\n"; + my $cat = attr::parse $1; + for my $atomcat (keys %cats) + { + if(attr::match @$cat, @{$cats{$atomcat}}) + { + print OUTPUT "REQ ".$atomcat." $2\n"; + ++$nreq; + } + } + } + elsif(/^LEFT\s+\S+$/) + { + ++$nleft; + print OUTPUT "$_\n"; + } + elsif(/^RIGHT\s+\S+$/) + { + ++$nright; + print OUTPUT "$_\n"; + } + elsif(/^INIT\s+[[:lower:]]\S*$/) + { + ++$ninitr; + s/INIT/INITR/; + print OUTPUT "$_\n"; + } + elsif(/^FIN\s+[[:lower:]]\S*$/) + { + ++$nfinr; + s/FIN/FINR/; + print OUTPUT "$_\n"; + } + elsif(/^INIT\s+[[:upper:]]+[+-]$/) + { + ++$ninitf; + s/INIT/INITF/; + s/[+-]//g; + print OUTPUT "$_\n"; + } + elsif(/^FIN\s+[[:upper:]]+$/) + { + ++$nfinf; + s/FIN/FINF/; + s/[+-]//g; + print OUTPUT "$_\n"; + } + # elsif(/^INIT\s+([[:upper:]]\S*)$/) + # { + # print OUTPUT "#$_\n"; + # my $cat = attr::parse $1; + # for my $atomcat (keys %cats) + # { + # if(attr::match @$cat, @{$cats{$atomcat}}) + # { + # print OUTPUT "INITC ".$atomcat."\n"; + # ++$ninitc; + # } + # } + # } + # elsif(/^FIN\s+([[:upper:]]\S*)$/) + # { + # print OUTPUT "#$_\n"; + # my $cat = attr::parse $1; + # for my $atomcat (keys %cats) + # { + # if(attr::match @$cat, @{$cats{$atomcat}}) + # { + # print OUTPUT "FINC ".$atomcat."\n"; + # ++$nfinc; + # } + # } + # } + elsif(my ($hs,$hfs,$ds,$dfs,$r,$rprops) = /^LINK\s+($cats_re)((?:;$avlist_re)?)\s+($cats_re)((?:;$avlist_re)?)\s+($role_re)((?:$proplist_re)?)$/) + { + print OUTPUT "#$_\n"; + for my $h ($hs =~ /$attr::cat_re/g) + { + for my $d ($ds =~ /$attr::cat_re/g) + { + addlinks($h,$hfs,$d,$dfs,$r,$rprops); + } + } + } + # elsif(my ($hs,$ds,$fs,$r) = /^LINK\s+($cats_re)\s+($cats_re)\s+(\S+)\s+(\S+)$/) + # { + # print OUTPUT "#$_\n"; + # for my $h ($hs =~ /$attr::cat_re/g) + # { + # for my $d ($ds =~ /$attr::cat_re/g) + # { + # addlinks1($h,$d,$fs,$r); + # } + # } + # } + elsif(/^FLAG\s+\S+$/) + { + ++$nflag; + print OUTPUT "$_\n" + } + elsif(/^SET\s+(\S+)\s+(\S+)$/) + { + print OUTPUT "#$_\n"; + my $cat = attr::parse $1; + my $flag = $2; + for my $atomcat (keys %cats) + { + if(attr::match @$cat, @{$cats{$atomcat}}) + { + print OUTPUT "SET ".$atomcat." $flag\n"; + ++$nset; + } + } + } + elsif(/^PASS\s+\S+\s+\S+$/) + { + ++$npass; + print OUTPUT "$_\n" + } + elsif(/^CONSTR[IE]\s+\S+\s+\S+$/) + { + ++$nconstr; + print OUTPUT "$_\n" + } + elsif(/^LONG\s+(\S+)((\s+<\S+)*)((\s+\S+>)*)$/) + { + ++$nlong; + my $rel = $1; + my $ups = $2; + my $downs = $4; + + $ups =~ s///g; + $downs =~ s/^\s+//; + my @down = split(/\s+/,$downs); + print OUTPUT "LONG $rel " . join(",",@up) . "^" . join(",",@down) . "\n"; + } + elsif(my ($cl,$cs) = /^CLASS\s+(\S+)\s*\:\s*(.*)/) + { + print OUTPUT "#$_\n"; + for my $c ($cs =~ /\S+/g) + { + my $cat = attr::parse $c; + + for my $atomcat (sort(keys %cats)) + { + if(attr::match @$cat, @{$cats{$atomcat}}) + { + print OUTPUT "CLASS $cl $atomcat\n"; + ++$nclass; + } + } + } + } + elsif(/^$/) { + # pomijamy puste linie oraz komentarze + } + else + { + print STDERR "Illegal format: $_\n"; + } +} + + +sub addlinks +{ + my ($h,$hfs,$d,$dfs,$r,$rprops) = @_; + + for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; } + for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; } + my $head = attr::parse $h; + my $dep = attr::parse $d; + + for my $atomhead (keys %cats) + { + if(attr::match @$head, @{$cats{$atomhead}}) + { + DEP: + for my $atomdep (keys %cats) + { + next DEP if ! attr::match @$dep, @{$cats{$atomdep}}; + + for my $a (@{$agr{$r}}) + { + next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a); + } + + for my $c (@{$gov{$r}}) + { + next DEP if ! attr::match(@$c,@{$cats{$atomdep}}); + } + + print OUTPUT "LINK $atomhead$hfs $atomdep$dfs $r$rprops\n"; + ++$nlink; + + } + } + } +} + + +sub addlinks1 +{ + my ($h,$d,$fs,$r) = @_; + + for my $a (@{$agr{$r}}) { print OUTPUT "#AGR $r $a\n"; } + for my $c (@{$gov{$r}}) { print OUTPUT "#GOV $r ".attr::unparse(@$c)."\n"; } + my $head = attr::parse $h; + my $dep = attr::parse $d; + + for my $atomhead (keys %cats) + { + if(attr::match @$head, @{$cats{$atomhead}}) + { + DEP: + for my $atomdep (keys %cats) + { + next DEP if ! attr::match @$dep, @{$cats{$atomdep}}; + + for my $a (@{$agr{$r}}) + { + next DEP if ! attr::agree(@{$cats{$atomhead}},@{$cats{$atomdep}},$a); + } + + for my $c (@{$gov{$r}}) + { + next DEP if ! attr::match(@$c,@{$cats{$atomdep}}); + } + + print OUTPUT "LINK $atomhead $atomdep $fs $r\n"; + ++$nlink; + + } + } + } +} + + +printf STDERR "%6d CAT statements\n", 0+keys(%cats); +printf STDERR "%6d ROLE statements\n", 0+keys(%roles); +printf STDERR "%6d SGL statements\n", $nsgl; +printf STDERR "%6d REQ statements\n", $nreq; +printf STDERR "%6d LEFT statements\n", $nleft; +printf STDERR "%6d RIGHT statements\n", $nright; +printf STDERR "%6d INITR statements\n", $ninitr; +printf STDERR "%6d FINR statements\n", $nfinr; +printf STDERR "%6d INITF statements\n", $ninitf; +printf STDERR "%6d FINF statements\n", $nfinf; +printf STDERR "%6d INITC statements\n", $ninitc; +printf STDERR "%6d FINC statements\n", $nfinc; +printf STDERR "%6d LINK statements\n", $nlink; +printf STDERR "%6d CLASS statements\n", $nclass; +printf STDERR "%6d FLAG statements\n", $nflag; +printf STDERR "%6d SET statements\n", $nset; +printf STDERR "%6d PASS statements\n", $npass; + + +sub extractcats +{ + my $file = shift; + open DICFILE, "$file"; + while() + { + while(/,([^[:space:];]+)/g) + { + my $cat=$1; + next if !$cat || exists $cats{$cat}; + $ncat++; + print OUTPUT "CAT $1\n"; + $cats{$cat}=attr::parse($cat); + } + } + close DICFILE; +} + + +sub loadcats +{ + my $file = shift; + open CATFILE, "$file"; + while() + { + tr/ \t\n//d; + next if !$_ || exists $cats{$_}; + print OUTPUT "CAT $_\n"; + ++$ncat; + $cats{$_}=attr::parse($_); + } + close CATFILE; +} + diff --git a/src/dgp.old/Makefile b/src/dgp.old/Makefile new file mode 100644 index 0000000..4c8e1d3 --- /dev/null +++ b/src/dgp.old/Makefile @@ -0,0 +1,72 @@ +include ../../config.mak + +SHELL = /bin/sh +LIB_PATH=../../lib +COMMON_PATH=../common +CMDLINE_FILE='"../dgp/cmdline.h"' + +#vpath %.o . + +ifeq ($(BUILD_STATIC), yes) + LDFLAGS += -static +endif + +CXXFLAGS += -O2 + +sources = main.cc grammar.cc symbol.cc mgraph.cc sgraph.cc dgp0.cc cmdline.cc \ + $(COMMON_PATH)/common.cc global.cc + +bin = dgp + +# plik *.o sa umieszczane w podkatalogu o +objs = $(sources:%.cc=%.o) + +${bin}: ${objs} + $(CXX) $(CXXFLAGS) -D _CMDLINE_FILE=$(CMDLINE_FILE) -o $@ ${objs} $(LDFLAGS) + +include $(sources:.cc=.d) + +%.o: %.cc + $(CXX) -D _CMDLINE_FILE=$(CMDLINE_FILE) -c ${CXXFLAGS} -o $@ $< + +%.d: %.cc + $(CC) -MM $(CPPFLAGS) -D _CMDLINE_FILE=$(CMDLINE_FILE) $< > $@.$$$$; \ + sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +# stare: +# cmdline.cc cmdline.h : cmdline.ggo +# gengetopt --c-extension=cc -i cmdline.ggo +# nowe +cmdline.cc cmdline.h: cmdline.ggo + $(GENGETOPT) -i cmdline.ggo --c-extension=cc --conf-parser + +cmdline.ggo: cmdline_dgp.ggo ../common/cmdline_common.ggo + cat cmdline_dgp.ggo ../common/cmdline_common.ggo > cmdline.ggo +# endnowe + + +clean: + rm ${bin} ${objs} cmdline.cc cmdline.h + rm -rf *.d + +prof: dgp + gprof dgp ~/tmp/dgp-pl/gmon.out > dgp.prof + +.PHONY: install +install: +ifdef BIN_DIR + install -m 0755 dgp $(BIN_DIR) + install -m 0755 dgc $(BIN_DIR) + install -m 0755 canonize $(BIN_DIR) + install -m 0755 tre $(BIN_DIR) +endif + +.PHONY: uninstall +uninstall: +ifdef BIN_DIR + rm $(BIN_DIR)/dgp + rm $(BIN_DIR)/dgc + rm $(BIN_DIR)/canonize + rm $(BIN_DIR)/tre +endif diff --git a/src/dgp/Makefile.user b/src/dgp.old/Makefile.user similarity index 100% rename from src/dgp/Makefile.user rename to src/dgp.old/Makefile.user diff --git a/src/dgp/canonize b/src/dgp.old/canonize similarity index 100% rename from src/dgp/canonize rename to src/dgp.old/canonize diff --git a/src/dgp/cmdline.ggo b/src/dgp.old/cmdline.ggo similarity index 100% rename from src/dgp/cmdline.ggo rename to src/dgp.old/cmdline.ggo diff --git a/src/dgp.old/cmdline_dgp.ggo b/src/dgp.old/cmdline_dgp.ggo new file mode 100644 index 0000000..c59786d --- /dev/null +++ b/src/dgp.old/cmdline_dgp.ggo @@ -0,0 +1,18 @@ +package "dgp" +version "0.1" + +option "grammar" g "Grammar file" + string no typestr="filename" + +option "long" l "Long output" + flag off + +option "debug" d "Debug mode." + flag off + +option "info" - "Print info. +h - heads d - dependents +s - sets +c - constraints n - node/arc counts t - parse time +" +string no default="h" diff --git a/src/dgp.old/const.hh b/src/dgp.old/const.hh new file mode 100644 index 0000000..7077f81 --- /dev/null +++ b/src/dgp.old/const.hh @@ -0,0 +1,13 @@ +#ifndef CONST_HH +#define CONST_HH + +#define MAXTYPES 32 +#define MAXFLAGS 64 +#define MAXNODES 1024 +#define MAXCONSTRS 32 +#define MAXLINE 256 +#define MAXFORMLEN 64 +#define MAXDESCRLEN 80 +#define FIELDSEP " \n\t" + +#endif diff --git a/src/dgp/dgc b/src/dgp.old/dgc similarity index 100% rename from src/dgp/dgc rename to src/dgp.old/dgc diff --git a/src/dgp.old/dgp0.cc b/src/dgp.old/dgp0.cc new file mode 100644 index 0000000..24faeb7 --- /dev/null +++ b/src/dgp.old/dgp0.cc @@ -0,0 +1,217 @@ +#include "dgp0.hh" +#include "global.hh" + +extern Grammar grammar; +extern MGraph mgraph; +extern SGraph sgraph; + +SNode* snodes; + +extern bool debug; + +list nodelist; +list::iterator processed; + + +void set_initial_constraints(int node) +{ + snodes[node].prop.forbidden.reset(); + snodes[node].prop.required=grammar.obl[snodes[node].mnode->cat]; +} + + +bool changing_constraints(int head, Role role) +{ + return grammar.sgl[role] || snodes[head].prop.required[role]; +} + +void apply_constraints(int head, Role role) +{ + if(grammar.sgl[role]) snodes[head].prop.forbidden.set(role); + snodes[head].prop.required.reset(role); +} + +NodeProp compute_prop_left(NodeProp headprop, Role role) +{ + NodeProp ret=headprop; + if(grammar.sgl[role]) ret.forbidden.set(role); + ret.required.reset(role); + return ret; +} + +NodeProp compute_prop_right(NodeProp headprop, Role role) +{ + NodeProp ret=headprop; + + if(grammar.sgl[role]) ret.forbidden.set(role); + ret.required.reset(role); + return ret; +} + +int get_node(MNode& mnode, NodeProp p, bitset& newheadLH, bitset& newheadLV) +{ + for(vector::iterator ps=mnode.snodes.begin(); ps!=mnode.snodes.end(); ++ps) + if(snodes[*ps].prop==p && snodes[*ps].LH==newheadLH && snodes[*ps].LV==newheadLV) + return *ps; + return -1; +} + +void connect_left(list::iterator h, list::iterator d, Role r) +{ + NodeProp &oldheadprop = snodes[*h].prop; + NodeProp newheadprop; + bitset newheadLV; + bitset newheadLH; + bitset newheadLD; + + newheadprop=compute_prop_left(oldheadprop,r); + + int newheadind; + if(oldheadprop==newheadprop) + newheadind = *h; + else + { + newheadLH = snodes[*h].LH; + newheadLV = snodes[*d].LV; + newheadLD = snodes[*h].LD; + + newheadind = get_node(*(snodes[*h].mnode), newheadprop, newheadLH, newheadLV); + if( newheadind < 0 ) + { + newheadind = sgraph.clone(*h,newheadprop); + list::iterator nextit=h; ++nextit; + nodelist.insert(nextit,newheadind); + snodes[newheadind].LH=newheadLH; + snodes[newheadind].in_LH=true; + snodes[newheadind].LV.reset(); + snodes[newheadind].LD = newheadLD; + + if(debug) sgraph.print_node_debug(stderr," C ",newheadind); + } + else + snodes[newheadind].LD |= newheadLD; // TYLKO DLA LD + } + + snodes[newheadind].deps.push_back(Arc(*d,r,*h)); + + if(snodes[*d].saturated()) snodes[newheadind].LV |= snodes[*d].LV; + snodes[newheadind].LD.set(*d); + if(snodes[*d].saturated()) snodes[newheadind].LD |= snodes[*d].LD; + + if(debug) + sgraph.print_arc(stderr,*d,newheadind,r,0), sgraph.print_node_debug(stderr," U ",newheadind); +} + + +void connect_right(list::iterator h, list::iterator d, Role r) +{ + NodeProp &oldheadprop = snodes[*h].prop; + NodeProp newheadprop; + bitset newheadLV; + bitset newheadLH; + bitset newheadLD; + int newheadind; + + newheadprop = compute_prop_right(oldheadprop,r); + if(oldheadprop==newheadprop) + newheadind = *h; + else + { + newheadLH = snodes[*h].LH; + newheadLV = snodes[*h].LV; + newheadLD = snodes[*h].LD; + + newheadind = get_node(*(snodes[*h].mnode), newheadprop, newheadLH, newheadLV); + if( newheadind < 0 ) + { + newheadind = sgraph.clone(*h,newheadprop); + snodes[newheadind].LH=newheadLH; + snodes[newheadind].in_LH=false; + snodes[newheadind].LV=newheadLV; + snodes[newheadind].LD=newheadLD; + list::iterator nextit=h; ++nextit; + nodelist.insert(nextit,newheadind); + + if(debug) sgraph.print_node_debug(stderr," C ",newheadind); + } + else + snodes[newheadind].LD |= newheadLD; // TYLKO DLA LD + } + + snodes[*d].heads.push_back(Arc(newheadind,r,*h)); + + snodes[*d].LH.set(newheadind); + + if(snodes[newheadind].saturated()) snodes[*d].LH |= snodes[newheadind].LH; + + if(debug) + sgraph.print_arc(stderr,newheadind,*d,r,1), sgraph.print_node_debug(stderr," U ",*d); + +} + + +void try_connect_dependents(list::iterator j) +{ + for(list::iterator i(j); i!=nodelist.begin(); --i) + if(sgraph.visible(*i,*j) && sgraph.saturated(*i)) + { + Roles& ji_roles = grammar.connect[snodes[*j].mnode->cat][snodes[*i].mnode->cat]; + for(RolesIter r=ji_roles.begin(); r!=ji_roles.end();++r) + if(grammar.check_constr(snodes[*j].prop,snodes[*i].prop,0,*r)) + connect_left(j,i,*r); + } +} + + +void try_connect_heads(list::iterator j) +{ + for(list::iterator i(j); i!=nodelist.begin(); --i) + if(sgraph.visible(*i,*j)) + { + Roles& ij_roles = grammar.connect[snodes[*i].mnode->cat][snodes[*j].mnode->cat]; + for(RolesIter r=ij_roles.begin(); r!=ij_roles.end();++r) + if(grammar.check_constr(snodes[*i].prop,snodes[*j].prop,1,*r)) + connect_right(i,j,*r); + } +} + + +void reverse_links() +{ + list::iterator i = nodelist.begin(); + for(++i; i!=nodelist.end(); ++i) + { + for(vector::iterator da=sgraph.nodes[*i].deps.begin()--; da!=sgraph.nodes[*i].deps.end(); ++da) + sgraph.nodes[da->dst].heads.push_back(Arc(*i,da->role,da->anc)); + for(vector::iterator ha=sgraph.nodes[*i].heads.begin(); ha!=sgraph.nodes[*i].heads.end(); ++ha) + sgraph.nodes[ha->dst].deps.push_back(Arc(*i,ha->role,ha->anc)); + } +} + + +void dgp0() +{ + snodes=sgraph.nodes; + + nodelist.clear(); + nodelist.push_back(0); // BOS + processed=nodelist.begin(); + + for(int m=0; m::iterator cursor=processed; + while(++cursor != nodelist.end()) + { + try_connect_dependents(cursor); + try_connect_heads(cursor); + processed=cursor; + } + } + reverse_links(); +} diff --git a/src/dgp.old/dgp0.hh b/src/dgp.old/dgp0.hh new file mode 100644 index 0000000..1a135da --- /dev/null +++ b/src/dgp.old/dgp0.hh @@ -0,0 +1,12 @@ +#ifndef _DGP0_HH +#define _DGP0_HH + +#include "grammar.hh" +#include "sgraph.hh" +#include "mgraph.hh" + +// API + +void dgp0(); + +#endif diff --git a/src/dgp.old/global.cc b/src/dgp.old/global.cc new file mode 100644 index 0000000..80a32f5 --- /dev/null +++ b/src/dgp.old/global.cc @@ -0,0 +1,5 @@ + +#include "global.hh" + +bool debug = false; + diff --git a/src/dgp.old/global.hh b/src/dgp.old/global.hh new file mode 100644 index 0000000..4af6605 --- /dev/null +++ b/src/dgp.old/global.hh @@ -0,0 +1 @@ +extern bool debug; diff --git a/src/dgp/go b/src/dgp.old/go similarity index 100% rename from src/dgp/go rename to src/dgp.old/go diff --git a/src/dgp.old/grammar.cc b/src/dgp.old/grammar.cc new file mode 100644 index 0000000..63e2882 --- /dev/null +++ b/src/dgp.old/grammar.cc @@ -0,0 +1,181 @@ + +#include + +#include "grammar.hh" + +bool (*constraint[MAXCONSTRS])(int head, int dep); + + +int chk_type(const char* s, int lineno) // SIDE EFECTS! +{ + if(Role::index(s)>0) return 1; + + fprintf(stderr,"%8d: Invalid type '%s'. Line ignored.\n",lineno,s); + return 0; +} + +int chk_cat(const char* s, int lineno) +{ + if(Cat::index(s)>0) return 1; + + fprintf(stderr,"%8d: Invalid category '%s'. Line ignored.\n",lineno,s); + return 0; +} + +void Grammar::add_category(const char* s) +{ + Cat::add(s); + if(Cat::count()>cats_sz) + { + cats_sz += 16; + connect.resize(cats_sz); + for(int i=0; itypes_sz) + { + types_sz += 16; + lt.resize(types_sz); + gt.resize(types_sz); + } +} + +void Grammar::add_flag(const char* s) +{ + Flag::add(s); + if(Flag::count()>flags_sz) + { + flags_sz += 16; + pass.resize(flags_sz); + } +} + + +void Grammar::set_lt(Role s, Role t) +{ + lt[s].set(t); + gt[t].set(s); + if(s==0||(int)t==0) + return; + else + { + for(int i=0; i=2) + { + add_category(arg1); + } + else if(strcmp(key,"ROLE")==0 && fields>=2) + { + add_type(arg1); + } + else if(strcmp(key,"SGL")==0 && fields>=2) + { + if(chk_type(arg1,lineno)) + set_sgl(arg1); + } + else if(strcmp(key,"LEFT")==0 && fields>=2) + { + if(chk_type(arg1,lineno)) + set_left(arg1); + } + else if(strcmp(key,"RIGHT")==0 && fields>=2) + { + if(chk_type(arg1,lineno)) + set_right(arg1); + } + else if(strcmp(key,"REQ")==0 && fields>=3) + { + if(chk_cat(arg1,lineno) + chk_type(arg2,lineno) == 2) + set_obl(arg1,arg2); + } + else if(strcmp(key,"LINK")==0 && fields>=4) + { + if(chk_cat(arg1,lineno) + chk_cat(arg2,lineno) + chk_type(arg3,lineno) == 3) + set_connect(arg1,arg2,arg3); + } + // FLAG DECLARATION + else if(strcmp(key,"FLAG")==0 && fields>=2) + { + add_flag(arg1); + } + + else fprintf(stderr,"Invalid line %d. Ignored.\n", lineno); + } + +// compute_gt(); + + return true; + +} + +void Grammar::write(FILE* f) +{ + for(Cat i=1; i +#include +#include +#include + +#include "const.hh" +#include "thesymbols.hh" +#include "sgraph.hh" + + +class Link +{ + Role role; + FlagSet hflags; + FlagSet dflags; +}; + + +class Grammar +{ + + public: + + // enum CONSTR { SGL, OBL, LEFT, RIGHT, INIT, NONINIT, FIN, NONFIN }; + + Grammar() : types_sz(0), cats_sz(0), flags_sz(0) {} ; + + int types_sz; + int cats_sz; + int flags_sz; + + vector< vector< Roles > > connect; + RoleSet sgl; + vector< RoleSet > obl; + RoleSet left; + RoleSet right; + vector< RoleSet > lt; + vector< RoleSet > gt; + + + // vector< vector< vector< + vector< FlagSet > set; + vector< FlagSet > pass; + + bool read(FILE* f); + void write(FILE* f); + + void add_category(const char* s); + void add_type(const char* s); + void add_flag(const char* s); + + void set_sgl(Role r) { sgl.set(r); } + void set_obl(Cat c, Role r) { obl[c].set(r); } + void set_left(Role r) { left.set(r); } + void set_right(Role r) { right.set(r); } + void set_order(Role r, Role s) { lt[s].set(r); } + void set_connect(Cat c, Cat d, Role r) { connect[c][d].insert(r); } + void set_lt(Role r, Role s); + void compute_gt(); + + + bool check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role); + +}; + +inline bool Grammar::check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role) +{ + return + !hprop.forbidden[role] && + ( !right[role] || dir==1 ) && + ( !left[role] || dir==0 ) + ; +} + + +#endif diff --git a/src/dgp.old/main.cc b/src/dgp.old/main.cc new file mode 100644 index 0000000..b29171e --- /dev/null +++ b/src/dgp.old/main.cc @@ -0,0 +1,121 @@ + +/** + * Package: UAM Text Tools + * Component: dgp (dg parser) + * Version: 1.0 + * Author: Tomasz Obrebski + */ + +#include "global.hh" +#include "mgraph.hh" +#include "sgraph.hh" +#include "grammar.hh" +#include "dgp0.hh" +#include "../common/common.h" +#include "cmdline.h" + +#define MAXSEGMENTS 500 + +char segment[MAXSEGMENTS][MAXLINE]; +int segcount=0; +char seg_mnode[MAXSEGMENTS]; +char grammarfile[255]; + + +Grammar grammar; +MGraph mgraph; +SGraph sgraph; + +FILE* grammarf; +FILE* debugf=stdout; +unsigned int info=0U; + +void output(); + +main(int argc, char* argv[]) +{ + gengetopt_args_info args; + + if(cmdline_parser(argc,argv,&args) != 0) + exit(1); + + process_config_files(&args,argv[0]); + process_common_options(&args,argv[0]); + + if(!args.grammar_given) + fprintf(stderr,"dgp: no grammar given\n"); + + expand_path(args.grammar_arg,grammarfile); + + if(!(grammarf=fopen(grammarfile,"r"))) + fprintf(stderr,"dgp: grammar file not found: %s.\n", grammarfile), exit(1); + + if(args.debug_given) debug=true; + + for(char* c=args.info_arg; *c!='\0' ; ++c) + switch(*c) + { + case 'h': info|=SGraph::HEADS; break; + case 'd': info|=SGraph::DEPS; break; + case 's': info|=SGraph::SETS; break; + case 'c': info|=SGraph::CONSTRAINTS; break; + } + + grammar.read(grammarf); + fclose(grammarf); + + mgraph.clear(); + sgraph.clear(); + + char line[1000]; + while (fgets(line, MAXLINE+1, inputf)) + { + line[strlen(line)-1] = '\0'; + strcpy(segment[segcount],line); + + char segtype[80]; + + seg_mnode[segcount] = process_seg(line, args) ? mgraph.add_node(line) : -1; + + segcount++; + + getfield(line,"3",segtype); + if(strcmp(segtype,"EOS")==0) + { + dgp0(); // parametry!!! MGraph, SGraph, Grammar + output(); + + mgraph.clear(); + sgraph.clear(); + segcount=0; + } + // if(args.interactive_flag) { fflush(outputf); fflush(failedf); } + } + + fclose(inputf); + fclose(outputf); + cmdline_parser_free(&args); + exit(0); +} + +void output() +{ + for(int si=0; si=0) + { + MNode& m=mgraph.nodes[seg_mnode[si]]; + for(vector::iterator s=m.snodes.begin(); s!=m.snodes.end(); ++s) + { + fputs(segment[si],outputf); + sgraph.print_node(outputf, *s, info); + fputc('\n',outputf); + } + } + else + { + fputs(segment[si],outputf); + fputc('\n',outputf); + } + } +} diff --git a/src/dgp.old/mgraph.cc b/src/dgp.old/mgraph.cc new file mode 100644 index 0000000..adc9d41 --- /dev/null +++ b/src/dgp.old/mgraph.cc @@ -0,0 +1,54 @@ + +#include "mgraph.hh" +#include "thesymbols.hh" +#include "const.hh" + +#include + +int MGraph::add_node(char* seg) +{ + nodes[n].clear(); + + char field1[80], field3[80], descr[256], gph[256]; + char* cat; + + getfield(seg,"1",field1); + nodes[n].pos=atoi(field1); + + getfield(seg,"3",field3); + if(!getfield(seg,"lem",descr)) strcpy(descr,"?,?"); + + cat=descr; + while(*cat!=',' && *cat ) ++cat; + if(*cat) ++cat; + +// Cat::add(cat); + if(Cat::index(cat)>0) + nodes[n].cat=cat; + else + nodes[n].cat="NULL"; + + nodes[n].pred.clear(); + + char* tok; + int previd; + + if(!getfield(seg,"gph",gph)) + { + fprintf(stderr,"No gph field. Aborting (sorry).\n"); + exit(1); + } + + char* ids=strtok(gph,":"); + if(n!=atoi(ids)){fprintf(stderr,"Invalid node id in line ?. Program aborted.\n"); exit(1); } + + char *preds; + while(preds=strtok(NULL,",")) + { + previd=atoi(preds); + nodes[n].pred.push_back(&nodes[previd]); + } + + return n++; +} + diff --git a/src/dgp.old/mgraph.hh b/src/dgp.old/mgraph.hh new file mode 100644 index 0000000..373eac2 --- /dev/null +++ b/src/dgp.old/mgraph.hh @@ -0,0 +1,34 @@ +#ifndef _MGRAPH_HH +#define _MGRAPH_HH + +#include + +#include "const.hh" +#include "thesymbols.hh" +#include "../common/common.h" + +class MNode +{ +public: + + char type[MAXFORMLEN]; + Cat cat; + int pos; + vector pred; + vector snodes; + + void clear() { snodes.clear(); }; +}; + +class MGraph +{ + public: + + MNode nodes[MAXNODES]; + int n; + + void clear() { n=0; }; + int add_node(char* seg); +}; + +#endif diff --git a/src/dgp.old/sgraph.cc b/src/dgp.old/sgraph.cc new file mode 100644 index 0000000..e8d50d5 --- /dev/null +++ b/src/dgp.old/sgraph.cc @@ -0,0 +1,165 @@ +#include "global.hh" +#include "sgraph.hh" +#include "mgraph.hh" +#include "grammar.hh" +#include "const.hh" +#include + + +int SGraph::add_base_snode(MNode* mn) +{ + int nodeind=n; + SNode &node=nodes[n]; + + node.clear(); + + node.mnode=mn; + + for(vector::iterator pm=node.mnode->pred.begin(); pm!=node.mnode->pred.end(); ++pm) + for(vector::iterator ps=(*pm)->snodes.begin(); ps!=(*pm)->snodes.end(); ++ps) + if(nodes[*ps].in_LH) + { + node.LV.set(*ps); + if(nodes[*ps].saturated()) node.LV |= nodes[*ps].LH; + } + + mn->snodes.push_back(nodeind); + ++n; + + node.in_LH=true; + + return nodeind; +} + + +void SGraph::update_left(int headind, int depind) +{ + SNode &head=nodes[headind], &dep=nodes[depind]; + + if(dep.saturated()) head.LV |= dep.LV, head.LD |= dep.LD; +} + + +void SGraph::update_right(int headind, int depind) +{ + SNode &head=nodes[headind], &dep=nodes[depind]; + + dep.LH.set(headind); + if(head.saturated()) + dep.LH |= head.LH; +} + + +int SGraph::clone(int ancind, NodeProp newprop) +{ + int newind = n++; + SNode &newnode=nodes[newind]; + SNode &ancnode = nodes[ancind]; + + newnode.clear(); + newnode.prop=newprop; + newnode.mnode=ancnode.mnode; + newnode.mnode->snodes.push_back(newind); + return newind; +} + + +//------------------------------------------------------------------------- +//------------------------------------------------------------------------- + + +int SGraph::print_node(FILE* f, int n, unsigned int info) +{ + char buf[1000]; + sprint_node(buf,n,info); + fputs(buf,f); +} + +int SGraph::sprint_node(char* buf, int nodeind, unsigned int info) +{ + char* buf0=buf; + char descr[256]; + char nodeinfo[16]; + + SNode &node=nodes[nodeind]; + + buf+=sprintf(buf," dgp:%d",nodeind); + buf+=sprintf(buf, saturated(nodeind) ? ";s" : ";u"); + + bool cont=false; + if (info&HEADS) + { + buf+=sprintf(buf,";"); + for(vector::iterator h=node.heads.begin(); h!=node.heads.end(); ++h) + { + if(cont) buf+=sprintf(buf,","); else cont=true; + buf+=sprintf(buf,"++%s-%d/%d",h->role.str(),h->dst,h->anc); + } + } + + if (info&DEPS) + { + buf+=sprintf(buf,";"); + for(vector::iterator d=node.deps.begin(); d!=node.deps.end(); ++d) + { + // if(! nodes[d->dst].saturated()) continue; // NIE DRUKUJ NIENASYCONYCH PODRZEDNIKOW + if(cont) buf+=sprintf(buf,","); else cont=true; + buf+=sprintf(buf,"--%s-%d/%d",d->role.str(),d->dst,d->anc); + } + } + + if (info&SETS) + { + int ord=0; + buf+=sprintf(buf,";{"); + for(vector::iterator pm=node.mnode->pred.begin(); pm!=node.mnode->pred.end(); ++pm) + for(vector::iterator ps=(*pm)->snodes.begin(); ps!=(*pm)->snodes.end(); ++ps) + buf+=sprintf(buf, ord++ ? ",%d" : "%d", *ps); + buf+=sprintf(buf,"};{"); + ord=0;for(int j=0; j<=n; ++j) if(node.LV[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j); + buf+=sprintf(buf,"};{"); + ord=0;for(int j=0; j<=n; ++j) if(node.LH[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j); + buf+=sprintf(buf,"};{"); + ord=0;for(int j=0; j<=n; ++j) if(node.LD[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j); + buf+=sprintf(buf,"}"); + } + + if (info&CONSTRAINTS)// buf+=sprint_node_constraints(buf,n); + { + buf+=sprintf(buf,";"); + int cont=0; + for(Role i=1; i<=Role::count(); ++i) + if(node.prop.forbidden[i]) buf+=sprintf(buf,"%s!%s",(cont++)?",":"",i.str()); + for(Role i=1; i<=Role::count(); ++i) + if(node.prop.required[i]) buf+=sprintf(buf,"%s&%s",(cont++)?",":"",i.str()); + } + +// buf+=sprintf(buf,"\n"); + + return buf-buf0; +} + + +int SGraph::sprint_node_debug(char* buf, const char* pref, int n) +{ + char *buf0 = buf; + buf+=sprintf(buf,"#%s",pref); + buf+=sprint_node(buf,n,HEADS|DEPS|SETS|CONSTRAINTS); + buf+=sprintf(buf,"\n"); + return buf-buf0; +} + +int SGraph::print_node_debug(FILE* f, const char* pref, int n) +{ + char buf[1000]; + sprint_node_debug(buf,pref,n); + fputs(buf,f); +} + +void SGraph::print_arc(FILE* f, int left, int right, Role role, int dir) // 0 - left, 1 - right +{ + fprintf(f,"# %s:%s.%02d %s %s.%02d\n", + role.str(),nodes[left].mnode->type,left, + dir ? "-->" : "<--", + nodes[right].mnode->type,right); +} diff --git a/src/dgp.old/sgraph.hh b/src/dgp.old/sgraph.hh new file mode 100644 index 0000000..1c04e39 --- /dev/null +++ b/src/dgp.old/sgraph.hh @@ -0,0 +1,108 @@ +#ifndef _SGRAPH_HH +#define _SGRAPH_HH + +#include + +#include +#include +#include + +#include "const.hh" +#include "thesymbols.hh" + + +class MNode; + + +struct Arc +{ + int dst; + Role role; + int anc; + + Arc(int d, Role r, int a) : dst(d), role(r), anc(a) {}; + }; + + +struct NodeProp +{ + bitset required; + bitset forbidden; + + bool operator==(const NodeProp& p) + { return required==p.required && forbidden==p.forbidden; } + + void clear() + { required.reset(), forbidden.reset(); } + +}; + + +struct SNode +{ + + MNode* mnode; + + NodeProp prop; + + bitset LV; + bitset LH; + bitset LD; + bool in_LH; + + vector heads; + vector deps; + + void clear() { prop.clear(), LV.reset(), LD.reset(), LH.reset(), heads.clear(), deps.clear(); } + bool saturated() { return prop.required.none(); } +}; + + + +class SGraph +{ +public: + + SNode nodes[MAXNODES]; + int n; // number of nodes + + enum Output { HEADS=1, DEPS=2, SETS=4, CONSTRAINTS=8 }; + + SGraph() : n(0) {} + + void clear() { n=0; } + + int add_base_snode(MNode* mn); + int clone(int ancind, NodeProp newprop); + void update_left(int headind, int depind); + void update_right(int headind, int depind); + + bool visible(int left, int right); + bool saturated(int node); + + //-------------------------------------------------------------------- + + void read(FILE* f); + void write(FILE* f, list nodelist, unsigned int info); + + int sprint_node(char* buf, int n, unsigned int info); + int print_node(FILE* f, int n, unsigned int info); + int sprint_node_debug(char* buf, const char* pref, int n); + int print_node_debug(FILE* f, const char* pref, int n); + + void print_arc(FILE* f, int left, int right, Role role, int dir); // 0 - left, 1 - right + +}; + + +inline bool SGraph::visible(int left, int right) +{ + return nodes[right].LV[left]; +} + +inline bool SGraph::saturated(int node) +{ + return nodes[node].saturated(); +} + +#endif diff --git a/src/dgp.old/symbol.cc b/src/dgp.old/symbol.cc new file mode 100644 index 0000000..a10c241 --- /dev/null +++ b/src/dgp.old/symbol.cc @@ -0,0 +1,39 @@ +#include "symbol.hh" + +// CLASS symbols + +//int Symbols::_no_of_spaces=0; + +Symbols::~Symbols() +{ + while(!table.empty()) + { + free((void*)table.back()); + table.pop_back(); + } +} + +void Symbols::load(const char* filename) +{ + ifstream f(filename); + char s[100]; + while(f) + { + f >> s >> ws; + if(strlen(s)) add(s); + } +} + +void Symbols::add(const char* sym) +{ + if(hash.count(sym)==0) + { + char* symdup=strdup(sym); + hash[symdup]=table.size(); + table.push_back(symdup); + } +} + + +//template +//Symbols Symbol::defs; diff --git a/src/dgp.old/symbol.hh b/src/dgp.old/symbol.hh new file mode 100644 index 0000000..2a70bc2 --- /dev/null +++ b/src/dgp.old/symbol.hh @@ -0,0 +1,143 @@ +#ifndef _SYMBOL_HH +#define _SYMBOL_HH + +#include +//#include +#include +#include +#include +#include +#include + +using namespace std; + +using __gnu_cxx::hash_map; +using __gnu_cxx::hash; + + +// Key comparison for the cstr_hash hash table +struct eqstr +{ + bool operator()(const char * s, const char* t) const + { return strcmp(s,t)==0; } +}; + + +// Hash table for storing symbols + +typedef hash_map,eqstr> cstr_hash; + +// Symbol table. Provides access to symbols through their index or name. + +class Symbols +{ + public: + + Symbols() { add("NULL"); }; + ~Symbols(); + + void load(const char* filename); + + int operator[](const char* s) { return hash[s]; }; + + const char* operator[](int i) { return table[i]; }; + + void add(const char* c); + + int count() { return table.size(); }; + + private: + + std::vector table; + cstr_hash hash; + +}; + +////////////////////////////////////////////////////////////////////// + +/// Symbol class template. +/** The template argument determines the symbol space. + Each space is created with symbol "NULL" with indexed 0 already in. +*/ + +template +class Symbol +{ + public: + + /// Load the contents of the symbol table from file. + static void define(const char *filename) + { defs.load(filename); } + + /// Add symbol s. + /** The string is duplicated. + */ + static Symbol add(const char* s) { defs.add(s); } + + /// Number of symbols. + static int count() { return defs.count(); }; + + /// First symbol. + static int first() { return 1; } + + /// Last symbol. + static int last() { return defs.count()+1; } + + /// Last symbol. + static int index(const char* s) { return defs[s]; } + + /// Just for tests. + static void print(); + + /// 0-argument constructor, default value is 0 ("NULL"). + Symbol() : val(0) {}; + + /// Constructing a symbol from its index. + /** No check is performed. + */ + + Symbol(int v) : val(v) {}; + + /// Constructing a symbol from its name (string to Symbol conversion). + /** If s is not a symbol name, the value of 0 ("NULL") is assigned. + */ + + Symbol(const char * s) : val(defs[s]) {}; + + /// Symbol to char* conversion. If symbol is invalid, NULL is returned. + const char* str() const { return (val>=0 && val s=1; s; s++ ) ... + s=0; while(++s) ... + */ + (operator int)() const { return val; }; + + Symbol operator++() {val++; return *this;} + + // bool operator<(Symbol& s) { return val < s.val; } + + + private: + static Symbols defs; + int val; +}; + +template +void Symbol::print() +{ + for(Symbol i=0; i +Symbols Symbol::defs; + +template +bool operator<(const Symbol& s, const Symbol& t) +{ + return (int)s < (int)t; +} + +#endif diff --git a/src/dgp.old/thesymbols.hh b/src/dgp.old/thesymbols.hh new file mode 100644 index 0000000..b90f997 --- /dev/null +++ b/src/dgp.old/thesymbols.hh @@ -0,0 +1,29 @@ +#ifndef __THESYMBOLS__HH +#define __THESYMBOLS__HH + +#include "symbol.hh" +#include "const.hh" + +#include +#include +#include + +typedef Symbol<1> Cat; + +typedef Symbol<2> Role; +typedef list RoleList; +typedef list::iterator RoleListIter; +typedef bitset RoleSet; +typedef set Roles; +typedef Roles::iterator RolesIter; + +typedef Symbol<3> Constr; +typedef list ConstrList; +typedef list::iterator ConstrListIter; + +typedef Symbol<4> Rel; + +typedef Symbol<5> Flag; +typedef bitset FlagSet; + +#endif diff --git a/src/dgp/tre b/src/dgp.old/tre similarity index 100% rename from src/dgp/tre rename to src/dgp.old/tre diff --git a/src/dgp.old/uttcommon.c b/src/dgp.old/uttcommon.c new file mode 100644 index 0000000..4f5773a --- /dev/null +++ b/src/dgp.old/uttcommon.c @@ -0,0 +1,2 @@ +#include "uttcommon.h" + diff --git a/src/dgp.old/uttcommon.h b/src/dgp.old/uttcommon.h new file mode 100644 index 0000000..490f964 --- /dev/null +++ b/src/dgp.old/uttcommon.h @@ -0,0 +1,146 @@ +#ifndef __COMMON_H +#define __COMMON_H + +#include + +/************************************************** + * Stale dotyczace wejscia/wyjscia + */ + +#define MAXLINE 1024 + +#define EMPTYFORM '*' +#define INFIELD_SEP ':' +#define MAXAUX 16 +#define FIELD_SEP " \t\n" + + +/***************************************************************/ +/* problems with casing */ +/* sprawdzenie wielkosci liter */ +/* wartość zwracana: */ +/* 0 - wszystkie małe litery, 1 - pierwsza wielka, reszta male */ +/* 2 - wszystkie wielkie, 3 - inne */ +/***************************************************************/ +inline int casing(char* s) +{ + int ret = isupper(*s) ? 1 : 0; + while(*++s != '\0') + { + if(isupper(*s)) + { + if(ret==1) ret=2; + else if(ret==0) ret=3; + } + else + { + if(ret==2) ret=3; + } + } + return ret; +} + +// +inline void tolowers(char* s, char* d) +{ + *d=tolower(*s); + while(*s != '\0') * ++d = tolower(* ++s); +} + + +// przepisuje s do d +// nadajac wielkość liter zgodnie z wartością casing +// casing - wartość zwracana przez casing() +// jeśli casing==3 przepisuje bez zmian (za mało informacji) +inline void restorecasing(char *s, char *d, int casing) +{ + switch(casing) + { + case 0: + case 3: + *d=*s; + while(*s != '\0') * ++d = * ++s; + break; + case 1: + *d=toupper(*s); + while(*s != '\0') * ++d = * ++s; + break; + case 2: + *d=toupper(*s); + while(*s != '\0') * ++d = toupper(* ++s); + break; + } +} + + +/**************************************************/ +/* +parameters: + -seg - segment + -name - field name + +val - field contents +return value: + 1 if specified field exists, 0 otherwise +*/ + +inline int getfield(char* seg, const char* pref, char* val) +{ + char* p=seg; + + while(isspace(*p)) ++p; + + pos: + if(isdigit(*p) or *p=='*') while(!isspace(*p)) ++p; + else goto type; + + while(isspace(*p)) ++p; + + len: + if(isdigit(*p) or *p=='*') while(!isspace(*p)) ++p; + else goto type; + + while(isspace(*p)) ++p; + + type: + while(isspace(*p)) ++p; while(!isspace(*p)) ++p; + + while(isspace(*p)) ++p; + + form: + while(isspace(*p)) ++p; while(!isspace(*p)) ++p; + + annotation: + do p=strstr(p,pref); while(p!=NULL && *(p-1)!=' ' && *(p-1)!='\t'); + + if(p==NULL) return 0; + else + { + p+=strlen(pref); + int len=strcspn(p,FIELD_SEP "\n\r\f\0"); + strncpy(val,p,len); + val[len]='\0'; + return 1; + } +} + + +/* +parameters: + +seg - segment + -pref - prefix of the new field + -val - contents of the new field +return value: + 1 - success, 0 - fail (limit on segment length exceeded) +*/ +inline int addfield(char *seg, const char *pref, const char *val) + // zalozenie, ze seg konczy sie znakiem \n +{ + if(strlen(seg)+strlen(pref)+strlen(val) >= MAXLINE) return 0; // bezpieczniej, ale wolniej + + int seglen=strlen(seg); + sprintf(seg+(seglen-1)," %s%s\n",pref,val); + return 1; +} + + +#endif diff --git a/src/dgp/Makefile b/src/dgp/Makefile index 4c8e1d3..3ed9a02 100644 --- a/src/dgp/Makefile +++ b/src/dgp/Makefile @@ -1,19 +1,18 @@ + include ../../config.mak + SHELL = /bin/sh LIB_PATH=../../lib COMMON_PATH=../common CMDLINE_FILE='"../dgp/cmdline.h"' + #vpath %.o . -ifeq ($(BUILD_STATIC), yes) - LDFLAGS += -static -endif +CXXFLAGS = -g -static -CXXFLAGS += -O2 - -sources = main.cc grammar.cc symbol.cc mgraph.cc sgraph.cc dgp0.cc cmdline.cc \ +sources = main.cc grammar.cc symbol.cc mgraph.cc sgraph.cc dgp1.cc cmdline.cc \ $(COMMON_PATH)/common.cc global.cc bin = dgp @@ -22,51 +21,45 @@ bin = dgp objs = $(sources:%.cc=%.o) ${bin}: ${objs} - $(CXX) $(CXXFLAGS) -D _CMDLINE_FILE=$(CMDLINE_FILE) -o $@ ${objs} $(LDFLAGS) + ${CXX} ${CXXFLAGS} -D _CMDLINE_FILE=$(CMDLINE_FILE) -o $@ ${objs} include $(sources:.cc=.d) %.o: %.cc - $(CXX) -D _CMDLINE_FILE=$(CMDLINE_FILE) -c ${CXXFLAGS} -o $@ $< + ${CXX} -D _CMDLINE_FILE=$(CMDLINE_FILE) -c ${CXXFLAGS} -o $@ $< %.d: %.cc $(CC) -MM $(CPPFLAGS) -D _CMDLINE_FILE=$(CMDLINE_FILE) $< > $@.$$$$; \ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ rm -f $@.$$$$ -# stare: -# cmdline.cc cmdline.h : cmdline.ggo -# gengetopt --c-extension=cc -i cmdline.ggo -# nowe + cmdline.cc cmdline.h: cmdline.ggo - $(GENGETOPT) -i cmdline.ggo --c-extension=cc --conf-parser + gengetopt -i cmdline.ggo --c-extension=cc --conf-parser cmdline.ggo: cmdline_dgp.ggo ../common/cmdline_common.ggo cat cmdline_dgp.ggo ../common/cmdline_common.ggo > cmdline.ggo -# endnowe + +.PHONY: clean clean: - rm ${bin} ${objs} cmdline.cc cmdline.h - rm -rf *.d + rm -f ${bin} ${objs} cmdline.* + rm -f *.d +.PHONY: prof prof: dgp gprof dgp ~/tmp/dgp-pl/gmon.out > dgp.prof + .PHONY: install install: ifdef BIN_DIR install -m 0755 dgp $(BIN_DIR) - install -m 0755 dgc $(BIN_DIR) - install -m 0755 canonize $(BIN_DIR) - install -m 0755 tre $(BIN_DIR) endif .PHONY: uninstall uninstall: ifdef BIN_DIR rm $(BIN_DIR)/dgp - rm $(BIN_DIR)/dgc - rm $(BIN_DIR)/canonize - rm $(BIN_DIR)/tre endif diff --git a/src/dgp/boubble.cc b/src/dgp/boubble.cc new file mode 100644 index 0000000..49ca220 --- /dev/null +++ b/src/dgp/boubble.cc @@ -0,0 +1,21 @@ + +#include "boubble.hh" + + + + + +Boubble mktestboubble() +{ + + Role::add("conj"); + Role::add("ccmpl"); + + list ul,dl; + dl.push_back(Role("conj")); + dl.push_back(Role("ccmpl")); + + + BoubbleAction act; + Boubble* b0 = new Boubble(ul,dl, act); +} diff --git a/src/dgp/boubble.hh b/src/dgp/boubble.hh new file mode 100644 index 0000000..7a306c0 --- /dev/null +++ b/src/dgp/boubble.hh @@ -0,0 +1,223 @@ +#ifndef _BOUBBLE_HH_ +#define _BOUBBLE_HH_ + +#include +#include +#include + +#include "thesymbols.hh" + + +enum Dir {UP=0,DOWN=1,AT_TARGET=2}; + +// class BoubbleAction +// { +// public: +// void apply() {}; +// private: + +// }; + + + +class Boubble +{ +public: + Boubble(list u, list d, LongRel l, int s=-1); + Boubble(const char* pathstr, const char* l, int s=-1); + + Dir dir(); + LongRel rel(); + int src(); + void src(int s); + + Role next(); + + Boubble* step(Role r, Dir d); + + bool is_at_target(); + + bool operator==(Boubble const& b) const; + bool operator!=(Boubble const& b) const; + bool operator<(Boubble const& b) const; + + void as_cstr(char* s); + friend std::ostream& operator<<(std::ostream&, const Boubble&); + +private: + + int _src; + list _upath; + list _dpath; + LongRel _rel; + +}; + +//---------------------------------------------------------------------------------------------------- + +inline +Boubble::Boubble(list u, list d, LongRel l, int s) : _upath(u), _dpath(d), _rel(l), _src(s) {} + +//---------------------------------------------------------------------------------------------------- + +inline +Boubble::Boubble(const char* pathstr, const char* l, int s) +{ + Dir dir = UP; + const char* p = pathstr; + while(*p) + { + if(*p=='^') { dir = DOWN; p++; } + else if(isalpha(*p)) + { + char buf[80]; + sscanf(p,"%[a-zA-Z0-9_]",buf); + dir == UP ? _upath.push_back(Role(buf)) : _dpath.push_back(Role(buf)); + p += strlen(buf); + } + else + p++; + } + + _rel = LongRel(l); + _src = s; +} + +//---------------------------------------------------------------------------------------------------- + +inline +Dir Boubble::dir() +{ + if(!_upath.empty()) + return UP; + else if(!_dpath.empty()) + return DOWN; + else return AT_TARGET; +} + +//---------------------------------------------------------------------------------------------------- + +inline +LongRel Boubble::rel() +{ return _rel; } + +//---------------------------------------------------------------------------------------------------- + +inline +int Boubble::src() +{ return _src; } + +//---------------------------------------------------------------------------------------------------- + +inline +void Boubble::src(int s) +{ _src=s; } + +//---------------------------------------------------------------------------------------------------- + +inline +Role Boubble::next() +{ + if(!_upath.empty()) + return _upath.front(); + else if(!_dpath.empty()) + return _dpath.front(); + else return Role("NULL"); +} + +//---------------------------------------------------------------------------------------------------- + +inline +Boubble* Boubble::step(Role r, Dir d) +{ + if(d==UP && !_upath.empty() && _upath.front() == r) + { + Boubble* newboubble = new Boubble(_upath,_dpath,_rel,_src); + newboubble->_upath.pop_front(); + return newboubble; + } + + if(d==DOWN && _upath.empty() && !_dpath.empty()) + { + Boubble* newboubble = new Boubble(_upath,_dpath,_rel,_src); + newboubble->_dpath.pop_front(); + return newboubble; + } + return NULL; +} + +//---------------------------------------------------------------------------------------------------- + +inline +bool Boubble::is_at_target() +{ return _upath.empty() && _dpath.empty(); } + +//---------------------------------------------------------------------------------------------------- + +inline +bool Boubble::operator==(Boubble const& b) const +{ + return _src==b._src && _upath==b._upath && _dpath==b._dpath && _rel==b._rel; +} + +//---------------------------------------------------------------------------------------------------- + +inline +bool Boubble::operator!=(Boubble const& b) const +{ + return !(*this==b); +} + +//---------------------------------------------------------------------------------------------------- + +inline +bool Boubble::operator<(Boubble const& b) const +{ + if(_src < b._src) return true; + if(_rel < b._rel) return true; + if(this < &b) return true; + return false; +} + +//---------------------------------------------------------------------------------------------------- + +inline +std::ostream& operator<<(std::ostream& o, const Boubble& b) +{ + o << "["; + + o << b._src << "|"; + + bool cont=false; + for(list::const_iterator i = b._upath.begin(); i != b._upath.end(); ++i) + { + if(cont) o << ','; + o << i->str(); + cont = true; + } + o << '^'; + cont=false; + for(list::const_iterator i = b._dpath.begin(); i != b._dpath.end(); ++i) + { + if(cont) o << ','; + o << i->str(); + cont = true; + } + o << ':'; + o << b._rel.str(); + o << "]"; +} + +//---------------------------------------------------------------------------------------------------- + +inline +void Boubble::as_cstr(char* s) +{ + stringstream oss; + oss << *this; + strcpy(s,oss.str().c_str()); +} + +//==================================================================================================== + +#endif diff --git a/src/dgp/const.hh b/src/dgp/const.hh index 7077f81..bab8833 100644 --- a/src/dgp/const.hh +++ b/src/dgp/const.hh @@ -2,10 +2,12 @@ #define CONST_HH #define MAXTYPES 32 +#define MAXCATS 4096 #define MAXFLAGS 64 -#define MAXNODES 1024 +#define MAXPROPS 16 +#define MAXNODES 2048 #define MAXCONSTRS 32 -#define MAXLINE 256 +#define MAXLINE 512 #define MAXFORMLEN 64 #define MAXDESCRLEN 80 #define FIELDSEP " \n\t" diff --git a/src/dgp/dgp0.cc b/src/dgp/dgp0.cc index 24faeb7..0155b33 100644 --- a/src/dgp/dgp0.cc +++ b/src/dgp/dgp0.cc @@ -5,8 +5,6 @@ extern Grammar grammar; extern MGraph mgraph; extern SGraph sgraph; -SNode* snodes; - extern bool debug; list nodelist; @@ -15,26 +13,26 @@ list::iterator processed; void set_initial_constraints(int node) { - snodes[node].prop.forbidden.reset(); - snodes[node].prop.required=grammar.obl[snodes[node].mnode->cat]; + sgraph[node].prop.forbidden.reset(); + sgraph[node].prop.required=grammar.is_obl(mgraph[sgraph[node].mnode].cat); } bool changing_constraints(int head, Role role) { - return grammar.sgl[role] || snodes[head].prop.required[role]; + return grammar.is_sgl(role) || sgraph[head].prop.required[role]; } void apply_constraints(int head, Role role) { - if(grammar.sgl[role]) snodes[head].prop.forbidden.set(role); - snodes[head].prop.required.reset(role); + if(grammar.is_sgl(role)) sgraph[head].prop.forbidden.set(role); + sgraph[head].prop.required.reset(role); } NodeProp compute_prop_left(NodeProp headprop, Role role) { NodeProp ret=headprop; - if(grammar.sgl[role]) ret.forbidden.set(role); + if(grammar.is_sgl(role)) ret.forbidden.set(role); ret.required.reset(role); return ret; } @@ -43,7 +41,7 @@ NodeProp compute_prop_right(NodeProp headprop, Role role) { NodeProp ret=headprop; - if(grammar.sgl[role]) ret.forbidden.set(role); + if(grammar.is_sgl(role)) ret.forbidden.set(role); ret.required.reset(role); return ret; } @@ -51,101 +49,90 @@ NodeProp compute_prop_right(NodeProp headprop, Role role) int get_node(MNode& mnode, NodeProp p, bitset& newheadLH, bitset& newheadLV) { for(vector::iterator ps=mnode.snodes.begin(); ps!=mnode.snodes.end(); ++ps) - if(snodes[*ps].prop==p && snodes[*ps].LH==newheadLH && snodes[*ps].LV==newheadLV) + if(sgraph[*ps].prop==p && sgraph[*ps].LH==newheadLH && sgraph[*ps].LV==newheadLV) return *ps; return -1; } void connect_left(list::iterator h, list::iterator d, Role r) { - NodeProp &oldheadprop = snodes[*h].prop; - NodeProp newheadprop; - bitset newheadLV; - bitset newheadLH; - bitset newheadLD; - - newheadprop=compute_prop_left(oldheadprop,r); + NodeProp &oldheadprop = sgraph[*h].prop; + NodeProp newheadprop = compute_prop_left(oldheadprop,r); int newheadind; if(oldheadprop==newheadprop) newheadind = *h; else { - newheadLH = snodes[*h].LH; - newheadLV = snodes[*d].LV; - newheadLD = snodes[*h].LD; + bitset newheadLH = sgraph[*h].LH; + bitset newheadLV = sgraph[*d].LV; + bitset newheadLD = sgraph[*h].LD; - newheadind = get_node(*(snodes[*h].mnode), newheadprop, newheadLH, newheadLV); + newheadind = get_node(mgraph[sgraph[*h].mnode], newheadprop, newheadLH, newheadLV); if( newheadind < 0 ) { newheadind = sgraph.clone(*h,newheadprop); list::iterator nextit=h; ++nextit; nodelist.insert(nextit,newheadind); - snodes[newheadind].LH=newheadLH; - snodes[newheadind].in_LH=true; - snodes[newheadind].LV.reset(); - snodes[newheadind].LD = newheadLD; + sgraph[newheadind].LH=newheadLH; + sgraph[newheadind].LD = newheadLD; + sgraph[newheadind].in_LH=true; + sgraph[newheadind].LV.reset(); if(debug) sgraph.print_node_debug(stderr," C ",newheadind); } else - snodes[newheadind].LD |= newheadLD; // TYLKO DLA LD + sgraph[newheadind].LD |= newheadLD; // TYLKO DLA LD } - snodes[newheadind].deps.push_back(Arc(*d,r,*h)); + sgraph[newheadind].deps.push_back(Arc(*d,r,*h)); - if(snodes[*d].saturated()) snodes[newheadind].LV |= snodes[*d].LV; - snodes[newheadind].LD.set(*d); - if(snodes[*d].saturated()) snodes[newheadind].LD |= snodes[*d].LD; - - if(debug) - sgraph.print_arc(stderr,*d,newheadind,r,0), sgraph.print_node_debug(stderr," U ",newheadind); -} + if(sgraph[*d].saturated()) sgraph[newheadind].LV |= sgraph[*d].LV; + sgraph[newheadind].LD.set(*d); + if(sgraph[*d].saturated()) sgraph[newheadind].LD |= sgraph[*d].LD; + + if(debug) sgraph.print_arc(stderr,*d,newheadind,r,0), sgraph.print_node_debug(stderr," U ",newheadind); +} void connect_right(list::iterator h, list::iterator d, Role r) { - NodeProp &oldheadprop = snodes[*h].prop; - NodeProp newheadprop; - bitset newheadLV; - bitset newheadLH; - bitset newheadLD; + NodeProp &oldheadprop = sgraph[*h].prop; + NodeProp newheadprop = compute_prop_right(oldheadprop,r); int newheadind; - newheadprop = compute_prop_right(oldheadprop,r); if(oldheadprop==newheadprop) newheadind = *h; else { - newheadLH = snodes[*h].LH; - newheadLV = snodes[*h].LV; - newheadLD = snodes[*h].LD; + bitset newheadLH = sgraph[*h].LH; + bitset newheadLV = sgraph[*h].LV; + bitset newheadLD = sgraph[*h].LD; - newheadind = get_node(*(snodes[*h].mnode), newheadprop, newheadLH, newheadLV); + newheadind = get_node(mgraph[sgraph[*h].mnode], newheadprop, newheadLH, newheadLV); if( newheadind < 0 ) { newheadind = sgraph.clone(*h,newheadprop); - snodes[newheadind].LH=newheadLH; - snodes[newheadind].in_LH=false; - snodes[newheadind].LV=newheadLV; - snodes[newheadind].LD=newheadLD; list::iterator nextit=h; ++nextit; nodelist.insert(nextit,newheadind); + sgraph[newheadind].LH=newheadLH; + sgraph[newheadind].LD=newheadLD; + sgraph[newheadind].in_LH=false; + sgraph[newheadind].LV=newheadLV; if(debug) sgraph.print_node_debug(stderr," C ",newheadind); } else - snodes[newheadind].LD |= newheadLD; // TYLKO DLA LD + sgraph[newheadind].LD |= newheadLD; // TYLKO DLA LD } - snodes[*d].heads.push_back(Arc(newheadind,r,*h)); + sgraph[*d].heads.push_back(Arc(newheadind,r,*h)); - snodes[*d].LH.set(newheadind); + sgraph[*d].LH.set(newheadind); - if(snodes[newheadind].saturated()) snodes[*d].LH |= snodes[newheadind].LH; + if(sgraph[newheadind].saturated()) sgraph[*d].LH |= sgraph[newheadind].LH; - if(debug) - sgraph.print_arc(stderr,newheadind,*d,r,1), sgraph.print_node_debug(stderr," U ",*d); + if(debug) sgraph.print_arc(stderr,newheadind,*d,r,1), sgraph.print_node_debug(stderr," U ",*d); } @@ -155,9 +142,9 @@ void try_connect_dependents(list::iterator j) for(list::iterator i(j); i!=nodelist.begin(); --i) if(sgraph.visible(*i,*j) && sgraph.saturated(*i)) { - Roles& ji_roles = grammar.connect[snodes[*j].mnode->cat][snodes[*i].mnode->cat]; + Roles& ji_roles = grammar.connectable( mgraph[sgraph[*j].mnode].cat, mgraph[sgraph[*i].mnode].cat ); for(RolesIter r=ji_roles.begin(); r!=ji_roles.end();++r) - if(grammar.check_constr(snodes[*j].prop,snodes[*i].prop,0,*r)) + if(grammar.check_constr(sgraph[*j].prop,sgraph[*i].prop,0,*r)) connect_left(j,i,*r); } } @@ -168,9 +155,9 @@ void try_connect_heads(list::iterator j) for(list::iterator i(j); i!=nodelist.begin(); --i) if(sgraph.visible(*i,*j)) { - Roles& ij_roles = grammar.connect[snodes[*i].mnode->cat][snodes[*j].mnode->cat]; + Roles& ij_roles = grammar.connectable( mgraph[sgraph[*i].mnode].cat, mgraph[sgraph[*j].mnode].cat ); for(RolesIter r=ij_roles.begin(); r!=ij_roles.end();++r) - if(grammar.check_constr(snodes[*i].prop,snodes[*j].prop,1,*r)) + if(grammar.check_constr(sgraph[*i].prop,sgraph[*j].prop,1,*r)) connect_right(i,j,*r); } } @@ -181,25 +168,25 @@ void reverse_links() list::iterator i = nodelist.begin(); for(++i; i!=nodelist.end(); ++i) { - for(vector::iterator da=sgraph.nodes[*i].deps.begin()--; da!=sgraph.nodes[*i].deps.end(); ++da) - sgraph.nodes[da->dst].heads.push_back(Arc(*i,da->role,da->anc)); - for(vector::iterator ha=sgraph.nodes[*i].heads.begin(); ha!=sgraph.nodes[*i].heads.end(); ++ha) - sgraph.nodes[ha->dst].deps.push_back(Arc(*i,ha->role,ha->anc)); + for(vector::iterator da=sgraph[*i].deps.begin()--; da!=sgraph[*i].deps.end(); ++da) + sgraph[da->dst].heads.push_back(Arc(*i,da->role,da->anc)); + for(vector::iterator ha=sgraph[*i].heads.begin(); ha!=sgraph[*i].heads.end(); ++ha) + sgraph[ha->dst].deps.push_back(Arc(*i,ha->role,ha->anc)); } } void dgp0() { - snodes=sgraph.nodes; nodelist.clear(); nodelist.push_back(0); // BOS processed=nodelist.begin(); - for(int m=0; m nodelist; +list::iterator processed; + +//==================================================================================================== + +void set_initial_constraints(int node) +{ + sgraph[node].prop.forbidden.reset(); + sgraph[node].prop.required=grammar.is_obl(sgraph.cat(node)); + sgraph[node].prop.attached.reset(); + sgraph[node].prop.flags=grammar.initial_flags(sgraph.cat(node)); +} + +//---------------------------------------------------------------------------------------------------- + +bool changing_constraints(int head, Role role) +{ + return grammar.is_sgl(role) || sgraph[head].prop.required[role]; +} + +//==================================================================================================== + +NodeProp compute_head_prop(NodeProp headprop, const Link& link, list bs, FlagSet& depflags) +{ + NodeProp ret=headprop; + + if(grammar.is_sgl(link.role)) + { + ret.forbidden.set(link.role); + ret.attached.set(link.role); + } + ret.required.reset(link.role); + + ret.required |= (grammar.constr_include(link.role) & ~ret.attached); + ret.forbidden |= grammar.constr_exclude(link.role); + + ret.boubbles=bs; + ret.flags |= ( depflags & grammar.pass_flags(link.role) ); + + if(link.props[Prop("INIT")]) ret.init_attached=true; + if(link.props[Prop("FIN")]) ret.fin_attached=true; + + return ret; +} + +//---------------------------------------------------------------------------------------------------- + +NodeProp compute_dep_prop(NodeProp depprop, const Link& link, list bs) +{ + NodeProp ret=depprop; + ret.boubbles=bs; + return ret; +} + +//==================================================================================================== + +int find_existing_node(int mnodeind, NodeProp p, bitset& newheadLH, bitset& newheadLV) +{ + MNode& mnode = mgraph[mnodeind]; + for(vector::iterator ps=mnode.snodes.begin(); ps!=mnode.snodes.end(); ++ps) + if(sgraph[*ps].prop==p && sgraph[*ps].LH==newheadLH && sgraph[*ps].LV==newheadLV) return *ps; + return -1; +} + +//==================================================================================================== + +list receive_boubbles(int node, Role role, Dir dir) +{ + list ret; + for(list::iterator b = sgraph[node].prop.boubbles.begin(); b!=sgraph[node].prop.boubbles.end(); b++) + { + Boubble* new_boubble = (*b)->step(role,dir); + if(new_boubble) + ret.push_back(new_boubble); + } + return ret; +} + +//---------------------------------------------------------------------------------------------------- + +list collect_head_boubbles(int head, int dep, Role role) +{ + list new_boubbles = grammar.trigger_boubbles(sgraph.cat(dep),role,UP); + + for(list::iterator b = new_boubbles.begin(); b != new_boubbles.end(); b++) + (*b)->src(dep); + + list received_boubbles = receive_boubbles(dep,role,UP); + + new_boubbles.insert(new_boubbles.begin(),received_boubbles.begin(),received_boubbles.end()); + + return new_boubbles; +} + +//---------------------------------------------------------------------------------------------------- + +list collect_dep_boubbles(int head, int dep, Role role) +{ + list new_boubbles = grammar.trigger_boubbles(sgraph.cat(head), role, DOWN); + + for(list::iterator b = new_boubbles.begin(); b != new_boubbles.end(); b++) + (*b)->src(head); + + list received_boubbles = receive_boubbles(head,role,DOWN); + + new_boubbles.insert(new_boubbles.begin(),received_boubbles.begin(),received_boubbles.end()); + + return new_boubbles; +} + +//==================================================================================================== + +int create_new_head_node_left(list::iterator h, NodeProp& newheadprop, bitset& newheadLH, bitset& newheadLD, bitset& newheadLV) +{ + int newheadind = sgraph.clone(*h,newheadprop); + list::iterator nextit=h; ++nextit; + nodelist.insert(nextit,newheadind); + sgraph[newheadind].LH=newheadLH; + sgraph[newheadind].LD = newheadLD; + sgraph[newheadind].in_LH=true; + sgraph[newheadind].LV.reset(); + + if(debug) sgraph.print_node_debug(stderr,"C ",newheadind,*h); + + return newheadind; +} + +int create_new_dep_node_left(list::iterator d, NodeProp& prop, bitset& LH, bitset& LD, bitset& LV) +{ + int newind = sgraph.clone(*d,prop); + list::iterator nextit=d; ++nextit; + nodelist.insert(nextit,newind); + sgraph[newind].LH.reset(); + sgraph[newind].LD=LD; + sgraph[newind].in_LH=false; //??????? + sgraph[newind].LV.reset(); + + if(debug) sgraph.print_node_debug(stderr,"C ",newind,*d); + + return newind; +} + +int create_new_head_node_right(list::iterator h, NodeProp& newheadprop, bitset& newheadLH, bitset& newheadLD, bitset& newheadLV) +{ + int newheadind = sgraph.clone(*h,newheadprop); + list::iterator nextit=h; ++nextit; + nodelist.insert(nextit,newheadind); + sgraph[newheadind].LH=newheadLH; + sgraph[newheadind].LD=newheadLD; + sgraph[newheadind].in_LH=false; + sgraph[newheadind].LV=newheadLV; + + if(debug) sgraph.print_node_debug(stderr,"C ",newheadind,*h); + + return newheadind; +} + +int create_new_dep_node_right(list::iterator d, NodeProp& prop, bitset& LH, bitset& LD, bitset& LV) +{ + int newind = sgraph.clone(*d,prop); + list::iterator nextit=d; ++nextit; + nodelist.insert(nextit,newind); + sgraph[newind].LH=LH; + sgraph[newind].LD=LD; + sgraph[newind].in_LH=true; //??????? + sgraph[newind].LV.reset(); + + if(debug) sgraph.print_node_debug(stderr,"C ",newind,*d); + + return newind; +} + +//==================================================================================================== + +void connect_left(list::iterator h, list::iterator d, const Link& l, list& new_head_boubbles, list& new_dep_boubbles) +{ + + NodeProp &oldheadprop = sgraph[*h].prop; + NodeProp &olddepprop = sgraph[*d].prop; + + NodeProp newheadprop = compute_head_prop(oldheadprop,l,new_head_boubbles,olddepprop.flags); + + int newheadind; + if(oldheadprop==newheadprop) + newheadind = *h; + else + { + bitset newheadLH = sgraph[*h].LH; + bitset newheadLV = sgraph[*d].LV; + bitset newheadLD = sgraph[*h].LD; + + newheadind = find_existing_node(sgraph[*h].mnode, newheadprop, newheadLH, newheadLV); + if( newheadind >= 0 ) + sgraph[newheadind].LD |= newheadLD; + else + newheadind = create_new_head_node_left(h,newheadprop,newheadLH,newheadLD,newheadLV); + } + + + + NodeProp newdepprop = compute_dep_prop(olddepprop,l,new_dep_boubbles); + + int newdepind; + + if(olddepprop==newdepprop) + newdepind = *d; + else + { + bitset newdepLH = sgraph[*d].LH; + bitset newdepLV = sgraph[*d].LV; + bitset newdepLD = sgraph[*d].LD; + + newdepind = find_existing_node(sgraph[*d].mnode, newdepprop, newdepLH, newdepLV); + if( newdepind >= 0 ) + sgraph[newdepind].LD |= newdepLD; // TYLKO DLA LD + else + newdepind = create_new_dep_node_left(d,newdepprop,newdepLH,newdepLD,newdepLV); + } + + sgraph[newheadind].deps.push_back(Arc(newdepind,l.role,*h,*d)); + + if(sgraph[*d].saturated()) sgraph[newheadind].LV |= sgraph[*d].LV; + + sgraph[newheadind].LD.set(*d); + if(sgraph[*d].saturated()) sgraph[newheadind].LD |= sgraph[*d].LD; + + if(debug) sgraph.print_arc(stderr,newheadind,*d,l.role,0); + if(debug) sgraph.print_node_debug(stderr,"U ",newheadind,*h); + if(debug) sgraph.print_node_debug(stderr,"U ",*d,*d); +} + +//---------------------------------------------------------------------------------------------------- + +void connect_right(list::iterator h, list::iterator d, const Link& l, list& new_head_boubbles, list& new_dep_boubbles) +{ + NodeProp &oldheadprop = sgraph[*h].prop; + + NodeProp newheadprop = compute_head_prop(oldheadprop,l,new_head_boubbles, sgraph[*d].prop.flags); + + int newheadind; + + if(oldheadprop==newheadprop) + newheadind = *h; + else + { + bitset newheadLH = sgraph[*h].LH; + bitset newheadLV = sgraph[*h].LV; + bitset newheadLD = sgraph[*h].LD; + + newheadind = find_existing_node(sgraph[*h].mnode, newheadprop, newheadLH, newheadLV); + if( newheadind >= 0 ) + sgraph[newheadind].LD |= newheadLD; // TYLKO DLA LD + else + newheadind = create_new_head_node_right(h,newheadprop,newheadLH,newheadLD,newheadLV); + } + + NodeProp &olddepprop = sgraph[*d].prop; + NodeProp newdepprop = compute_dep_prop(olddepprop,l,new_dep_boubbles); + + int newdepind; + + if(olddepprop==newdepprop) + newdepind = *d; + else + { + bitset newdepLH = sgraph[*d].LH; + bitset newdepLV = sgraph[*d].LV; + bitset newdepLD = sgraph[*d].LD; + + newdepind = find_existing_node(sgraph[*d].mnode, newdepprop, newdepLH, newdepLV); + if( newdepind >= 0 ) + sgraph[newdepind].LD |= newdepLD; // TYLKO DLA LD + else + newdepind = create_new_dep_node_right(d,newdepprop,newdepLH,newdepLD,newdepLV); + } + + + + + sgraph[newdepind].heads.push_back(Arc(newheadind,l.role,*h,*d)); + + sgraph[newdepind].LH.set(newheadind); + + // sgraph[*d].prop.merge_boubbles(new_dep_boubbles); + + if(sgraph[newheadind].saturated()) sgraph[newdepind].LH |= sgraph[newheadind].LH; + + if(debug) sgraph.print_arc(stderr,newheadind,newdepind,l.role,1); + if(debug) sgraph.print_node_debug(stderr,"U ",newheadind,*h); + if(debug) sgraph.print_node_debug(stderr,"U ",newdepind,*d); + +} + +//==================================================================================================== + +bool check_boubbles_at_target(list boubbles, int node) +{ + for(list::iterator b = boubbles.begin(); b != boubbles.end(); b++) + if( (*b)->is_at_target() && !grammar.check_longrel(sgraph.cat((*b)->src()), sgraph.cat(node), (*b)->rel())) + return false; + return true; +} + +//==================================================================================================== + +void try_connect_dependents(list::iterator j) +{ + for(list::iterator i(j); i!=nodelist.begin(); --i) + if(sgraph.visible(*i,*j) && sgraph.saturated(*i)) + { + if(debug) {fprintf(stderr,"## %d <-- %d ... ",*i,*j); } + + list ji_links = grammar.connectable2( sgraph.cat(*j), sgraph.cat(*i), sgraph[*j].prop.flags, sgraph[*i].prop.flags); // ref do Roles!!! + list::iterator ri = ji_links.begin(); + if(ri == ji_links.end()) { if(debug) fprintf(stderr,"no roles\n"); } + else + { + for(; ri != ji_links.end(); ++ri ) + if(!grammar.check_constr2(sgraph[*j].prop,sgraph[*i].prop,0,**ri )) + { if(debug) fprintf(stderr,"constraints failed\n"); } + else + { + list new_head_boubbles = collect_head_boubbles(*j,*i,(*ri)->role); + list new_dep_boubbles = collect_dep_boubbles(*j,*i,(*ri)->role); + + if( !(check_boubbles_at_target(new_head_boubbles,*j) && check_boubbles_at_target(new_dep_boubbles,*i)) ) + { if(debug) fprintf(stderr,"boubbles failed\n"); } + else + { + if(debug) fprintf(stderr,"success\n"); + connect_left( j, i, **ri, new_head_boubbles, new_dep_boubbles); + } + } + } + } +} + + +//---------------------------------------------------------------------------------------------------- + +void try_connect_heads(list::iterator j) +{ + for(list::iterator i(j); i!=nodelist.begin(); --i) + if(sgraph.visible(*i,*j)) + { + if(debug) fprintf(stderr, "## %d --> %d ... ",*i,*j); + + list ij_links = grammar.connectable2( sgraph.cat(*i), sgraph.cat(*j), sgraph[*i].prop.flags, sgraph[*j].prop.flags ); + list::iterator ri = ij_links.begin(); + if(ri == ij_links.end()) { if(debug) fprintf(stderr,"no roles\n"); } + else + { + for(; ri != ij_links.end(); ++ri ) + if( !grammar.check_constr2( sgraph[*i].prop, sgraph[*j].prop, 1, **ri ) ) + { if(debug) fprintf(stderr,"constraints failed\n"); } + else + { + list new_head_boubbles = collect_head_boubbles(*i,*j,(*ri)->role); + list new_dep_boubbles = collect_dep_boubbles(*i,*j,(*ri)->role); + + if( !(check_boubbles_at_target(new_head_boubbles,*i) && check_boubbles_at_target(new_dep_boubbles,*j)) ) + { if(debug) fprintf(stderr,"boubbles failed\n"); } + else + { + if(debug) fprintf(stderr,"success\n"); + connect_right( i, j, **ri, new_head_boubbles, new_dep_boubbles ); + } + } + } + } +} + +//==================================================================================================== + +void reverse_links() +{ + list::iterator i = nodelist.begin(); + for(++i; i!=nodelist.end(); ++i) + { + for(vector::iterator da=sgraph[*i].deps.begin()--; da!=sgraph[*i].deps.end(); ++da) + sgraph[da->dst].heads.push_back(Arc(*i,da->role,da->headanc,da->depanc)); + for(vector::iterator ha=sgraph[*i].heads.begin(); ha!=sgraph[*i].heads.end(); ++ha) + sgraph[ha->dst].deps.push_back(Arc(*i,ha->role,ha->headanc,ha->depanc)); + } +} + +//==================================================================================================== + +void dgp1() +{ + + nodelist.clear(); + nodelist.push_back(0); // BOS + processed=nodelist.begin(); + + for(int m=0; m::iterator cursor=processed; + while(++cursor != nodelist.end()) + { + try_connect_dependents(cursor); + try_connect_heads(cursor); + processed=cursor; + } + + } + reverse_links(); +} diff --git a/src/dgp/dgp1.hh b/src/dgp/dgp1.hh new file mode 100644 index 0000000..234916b --- /dev/null +++ b/src/dgp/dgp1.hh @@ -0,0 +1,12 @@ +#ifndef _DGP0_HH +#define _DGP0_HH + +#include "grammar.hh" +#include "sgraph.hh" +#include "mgraph.hh" + +// API + +void dgp1(); + +#endif diff --git a/src/dgp/grammar.cc b/src/dgp/grammar.cc index 63e2882..bd81717 100644 --- a/src/dgp/grammar.cc +++ b/src/dgp/grammar.cc @@ -1,61 +1,102 @@ -#include +#include #include "grammar.hh" -bool (*constraint[MAXCONSTRS])(int head, int dep); +//bool (*constraint[MAXCONSTRS])(int head, int dep); +//==================================================================================================== +//inline !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +bool chk_type(const char* s) { return Role::index(s)>0; } // PRZENIEŚĆ DO Role +//---------------------------------------------------------------------------------------------------- +//inline !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +bool chk_long(const char* s) { return LongRel::index(s)>0; } // jw +//---------------------------------------------------------------------------------------------------- +//inline !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +bool chk_cat(const char* s) { return Cat::index(s)>0; } //jw +//---------------------------------------------------------------------------------------------------- +//inline !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +bool chk_flag(const char* s) { return Flag::index(s)>0; } //jw +//==================================================================================================== -int chk_type(const char* s, int lineno) // SIDE EFECTS! +void grammar_error(int lineno, string s="Grammar error.") { - if(Role::index(s)>0) return 1; - - fprintf(stderr,"%8d: Invalid type '%s'. Line ignored.\n",lineno,s); - return 0; + fprintf(stderr,"%8d: %s Line ignored.\n",lineno,s.c_str()); } -int chk_cat(const char* s, int lineno) -{ - if(Cat::index(s)>0) return 1; - - fprintf(stderr,"%8d: Invalid category '%s'. Line ignored.\n",lineno,s); - return 0; -} +//==================================================================================================== void Grammar::add_category(const char* s) { Cat::add(s); - if(Cat::count()>cats_sz) + + if(connect.size() <= Cat::count()) { - cats_sz += 16; - connect.resize(cats_sz); - for(int i=0; itypes_sz) - { - types_sz += 16; - lt.resize(types_sz); - gt.resize(types_sz); - } + + if(lt.size() <= Role::count()) lt.resize(Role::count()+RESIZE_DELTA); + if(gt.size() <= Role::count()) gt.resize(Role::count()+RESIZE_DELTA); + if(pass.size() <= Role::count()) pass.resize(Role::count()+RESIZE_DELTA); + if(include.size() <= Role::count()) include.resize(Role::count()+RESIZE_DELTA); + if(exclude.size() <= Role::count()) exclude.resize(Role::count()+RESIZE_DELTA); + for(int i=0; iflags_sz) - { - flags_sz += 16; - pass.resize(flags_sz); - } +//==================================================================================================== + +bool Grammar::contains_boubble(const list boubble_list, Boubble* bp) const +{ + for(list::const_iterator bi = boubble_list.begin(); bi != boubble_list.end(); bi++ ) + if(**bi == *bp) return true; + return false; } +//---------------------------------------------------------------------------------------------------- + +void Grammar::add_triggers(Cat src, Cat dest, LongRel l) +{ + for(list::const_iterator b=boubbles.begin(); b!=boubbles.end(); b++) + if((*b)->rel() == l) + { + list& boubble_list = ((*b)->dir()==UP) ? uptrigger[src][(*b)->next()] : dntrigger[src][(*b)->next()] ; + if(!contains_boubble(boubble_list,*b)) + boubble_list.push_back(*b); + } +} + +//==================================================================================================== void Grammar::set_lt(Role s, Role t) { @@ -84,77 +125,343 @@ void Grammar::compute_gt() } +void Grammar::compute_triggers() +{ + //init uptrigger array + uptrigger.resize(Cat::count()); + for(int i=0; i::const_iterator b=boubbles.begin(); b!=boubbles.end; b++) + // if(b->dir()==UP && ) +} + +//==================================================================================================== + +list Grammar::trigger_boubbles(Cat c, Role r, Dir d) +{ + list boubble_list = (d == UP) ? uptrigger[c][r] : dntrigger[c][r]; + list ret; + for(list::iterator b = boubble_list.begin(); b != boubble_list.end(); ++b) + ret.push_back((*b)->step(r,d)); + return ret; +} + +//==================================================================================================== + +Flag parse_flags(const char* s, const char* v) +{ + char buf[16][17]; + int n=sscanf(s,"%[A-Z]%[+-]%[A-Z]%[+-]%[A-Z]%[+-]%[A-Z]%[+-]%[A-Z]%[+-]%[A-Z]%[+-]%[A-Z]%[+-]%[A-Z]%[+-]", + buf[0],buf[1],buf[2],buf[3],buf[4],buf[5],buf[6],buf[7],buf[8],buf[9],buf[10],buf[11],buf[12],buf[13],buf[14],buf[15],buf[16]); + for(int i=2; i<=n; i+=2) + if(strcmp(buf[i-1],v)==0) + return Flag(buf[i-2]); + return Flag("NULL"); +} + + +PropSet parse_props(const char* s) +{ + PropSet ret; + char buf[8][17]; + int n=sscanf(s,"&%[A-Z]&%[A-Z]&%[A-Z]&%[A-Z]&%[A-Z]&%[A-Z]&%[A-Z]&%[A-Z]",buf[0],buf[1],buf[2],buf[3],buf[4],buf[5],buf[6],buf[7]); + for(int i=1; i<=n; i++) + ret.set(Prop(buf[i-1])); + return ret; + +} + + bool Grammar::read(FILE* f) { + + //>>> TU? + + Prop::add("INIT"); + Prop::add("FIN"); + + //<<< TU? + + int lineno=0; char line[MAXLINE]; // line has the structure: key [arg1 [arg2 [arg3]]] char key[MAXLINE]; char arg1[MAXLINE]; char arg2[MAXLINE]; char arg3[MAXLINE]; + char arg4[MAXLINE]; while(fgets(line,MAXLINE,f)) - { - lineno++; - int fields=sscanf(line,"%s %s %s %s",key,arg1,arg2,arg3); - - if(fields<1 || key[0]=='#') continue; // skip empty lines and comments - - if (strcmp(key,"CAT")==0 && fields>=2) { - add_category(arg1); - } - else if(strcmp(key,"ROLE")==0 && fields>=2) - { - add_type(arg1); - } - else if(strcmp(key,"SGL")==0 && fields>=2) - { - if(chk_type(arg1,lineno)) - set_sgl(arg1); - } - else if(strcmp(key,"LEFT")==0 && fields>=2) - { - if(chk_type(arg1,lineno)) - set_left(arg1); - } - else if(strcmp(key,"RIGHT")==0 && fields>=2) - { - if(chk_type(arg1,lineno)) - set_right(arg1); - } - else if(strcmp(key,"REQ")==0 && fields>=3) - { - if(chk_cat(arg1,lineno) + chk_type(arg2,lineno) == 2) - set_obl(arg1,arg2); - } - else if(strcmp(key,"LINK")==0 && fields>=4) - { - if(chk_cat(arg1,lineno) + chk_cat(arg2,lineno) + chk_type(arg3,lineno) == 3) - set_connect(arg1,arg2,arg3); - } - // FLAG DECLARATION - else if(strcmp(key,"FLAG")==0 && fields>=2) - { - add_flag(arg1); - } + lineno++; + int fields=sscanf(line,"%s %s %s %s %s",key,arg1,arg2,arg3,arg4); + + if(fields<1 || key[0]=='#') continue; // skip empty lines and comments - else fprintf(stderr,"Invalid line %d. Ignored.\n", lineno); - } + if(fields>1 && arg1[0] == '#') fields=1; + if(fields>2 && arg2[0] == '#') fields=2; + if(fields>3 && arg3[0] == '#') fields=3; + if(fields>4 && arg4[0] == '#') fields=4; + + if (strcmp(key,"CAT")==0 && fields==2) + { + add_category(arg1); + } + else if(strcmp(key,"ROLE")==0 && fields==2) + { + add_type(arg1); + } + else if(strcmp(key,"SGL")==0 && fields==2) + { + if(chk_type(arg1)) + set_sgl(arg1); + else + grammar_error(lineno); + } + else if(strcmp(key,"LEFT")==0 && fields==2) + { + if(chk_type(arg1)) + set_left(arg1); + else + grammar_error(lineno); + } + else if(strcmp(key,"RIGHT")==0 && fields==2) + { + if(chk_type(arg1)) + set_right(arg1); + else + grammar_error(lineno); + } + else if(strcmp(key,"INITR")==0 && fields==2) + { + if(chk_type(arg1)) + set_init(arg1); + else + grammar_error(lineno); + } + else if(strcmp(key,"FINR")==0 && fields==2) + { + if(chk_type(arg1)) + set_fin(arg1); + else + grammar_error(lineno); + } + else if(strcmp(key,"INITF")==0 && fields==2) + { + if(chk_flag(arg1)) + set_initf(arg1); + else + grammar_error(lineno); + } + else if(strcmp(key,"FINF")==0 && fields==2) + { + if(chk_flag(arg1)) + set_finf(arg1); + else + grammar_error(lineno); + } + else if(strcmp(key,"REQ")==0 && fields==3) + { + if( chk_cat(arg1) && chk_type(arg2) ) + set_obl(arg1,arg2); + else + grammar_error(lineno); + } + else if(strcmp(key,"CONSTRE")==0 && fields==3) + { + if( chk_type(arg1) && chk_type(arg2) ) + set_exclude(arg1,arg2); + else + grammar_error(lineno); + } + else if(strcmp(key,"CONSTRI")==0 && fields==3) + { + if( chk_type(arg1) && chk_type(arg2) ) + set_include(arg1,arg2); + else + grammar_error(lineno); + } + else if(strcmp(key,"LONG")==0 && fields ==3) + { + add_long(arg1,arg2); + } + else if(strcmp(key,"LINK")==0 && fields==4) + { + char cat1[MAXLINE],flags1[MAXLINE],cat2[MAXLINE],flags2[MAXLINE],type[MAXLINE],props[MAXLINE]; -// compute_gt(); + if(sscanf(arg1,"%[^;];%s",cat1,flags1)==1) *flags1='\0'; + if(sscanf(arg2,"%[^;];%s",cat2,flags2)==1) *flags2='\0'; + if(sscanf(arg3,"%[^&]%s",type,props)==1) *props='\0'; + + // printf("line=%s\n\tcat1=%s flags1=%s cat2=%s flags2=%s type=%s props=%s\n",line,cat1,flags1,cat2,flags2,type,props); + if( chk_cat(cat1) && chk_cat(cat2) && chk_type(type) ) + set_connect(cat1,parse_flags(flags1,"+"),parse_flags(flags1,"-"),cat2,parse_flags(flags2,"+"),parse_flags(flags2,"-"),type,parse_props(props)); + else if( chk_cat(cat1) && chk_cat(cat2) && chk_long(type) ) + { + set_longrel(cat1,cat2,type); + add_triggers(cat1,cat2,type); + } + else + grammar_error(lineno); + } + // else if(strcmp(key,"LINK")==0 && fields==5) + // { + // if( chk_cat(arg1) && chk_cat(arg2) && chk_type(arg4) ) + // set_connect(arg1,arg2,arg3,arg4); + // else + // grammar_error(lineno); + // } + // FLAG DECLARATION + else if(strcmp(key,"FLAG")==0 && fields==2) + { + add_flag(arg1); + } + else if(strcmp(key,"SET")==0 && fields==3) + { + if( chk_cat(arg1) && chk_flag(arg2) ) + set_set(arg1,arg2); + else + grammar_error(lineno); + } + else if(strcmp(key,"PASS")==0 && fields==3) + { + if( chk_type(arg1) && chk_flag(arg2) ) + set_pass(arg1,arg2); + else + grammar_error(lineno); + } + + else fprintf(stderr,"Statement not recognized in line %d. Ignored.\n", lineno); + } + + // compute_gt(); + return true; } + +void Grammar::write(ostream& os) +{ + for(Cat i=1; ihflagplus||l->hflagminus) + { + os << ";"; + if(l->hflagplus) os << (l->hflagplus).str() << "+"; + if(l->hflagminus) os << (l->hflagminus).str() << "-"; + } + os << "\t" << d.str(); + if(l->dflagplus||l->dflagminus) + { + os << ";"; + if(l->dflagplus) os << (l->dflagplus).str() << "+"; + if(l->dflagminus) os << (l->dflagminus).str() << "-"; + } + os << "\t" << (l->role).str(); + for(Prop p=0; pprops[p]) + os << "&" << p.str(); + os << endl; + } + + for(LongRel i=1; i::const_iterator b = boubbles.begin(); b != boubbles.end(); b++) + os << "BOUBBLE\t" << **b << endl; + + for(Cat c=1; c::const_iterator b=uptrigger[c][r].begin(); b!=uptrigger[c][r].end(); b++) + os << "TRIGGER-UP\t" << c.str() << "\t" << r.str() << "\t" << (*b)->rel().str() << endl; + + for(Cat c=1; c::const_iterator b=dntrigger[c][r].begin(); b!=dntrigger[c][r].end(); b++) + os << "TRIGGER-DN\t" << c.str() << "\t" << r.str() << "\t" << (*b)->rel().str() << endl; + + for(Flag i=1; i::const_iterator b=uptrigger[c][r].begin(); b!=uptrigger[c][r].end(); b++) + fprintf(f,"#TRIGGER\t%s\t%s\t%s\n",c.str(),r.str(),(*b)->rel().str()); + + for(Cat c=1; c::const_iterator b=dntrigger[c][r].begin(); b!=dntrigger[c][r].end(); b++) + fprintf(f,"#TRIGGER\t%s\t%s\t%s\n",c.str(),r.str(),(*b)->rel().str()); + for(Flag i=1; i PropSet; + +const PropSet EmptyPropSet = PropSet(); + +const FlagSet EmptyFlagSet = FlagSet(); + +//==================================================================================================== +// class Link +//==================================================================================================== + +struct Link { + Link(Role r, Flag dfplus="NULL", Flag dfminus="NULL") : role(r), dflagplus(dfplus), dflagminus(dfminus) { } + Link(Role r, PropSet ps=EmptyPropSet, Flag hfp="NULL", Flag hfm="NULL", Flag dfp="NULL", Flag dfm="NULL") + : role(r), props(ps), hflagplus(hfp), hflagminus(hfm), dflagplus(dfp), dflagminus(dfm) { } + //Link(Role r) : role(r), dflagplus("NULL") { } + Role role; - FlagSet hflags; - FlagSet dflags; + Flag hflagplus; + Flag hflagminus; + Flag dflagplus; + Flag dflagminus; + PropSet props; + + bool operator<(const Link& l) const + { + if(role < l.role) return true; + if(hflagplus < l.hflagplus) return true; + if(hflagminus < l.hflagminus) return true; + if(dflagplus < l.dflagplus) return true; + if(dflagminus < l.dflagminus) return true; + if(props.to_ulong() < l.props.to_ulong()) return true; + return false; + } + }; +typedef set Links; + +//==================================================================================================== +// class Grammar +//==================================================================================================== class Grammar { public: - // enum CONSTR { SGL, OBL, LEFT, RIGHT, INIT, NONINIT, FIN, NONFIN }; + static const int RESIZE_DELTA=16; - Grammar() : types_sz(0), cats_sz(0), flags_sz(0) {} ; + Grammar() {} ; + + Roles& connectable(Cat h, Cat d); + Roles connectable(Cat h, Cat d, FlagSet f, FlagSet df); + + list connectable2(Cat h, Cat d, FlagSet hfs, FlagSet dfs); + + bool check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role); + bool check_constr2(NodeProp& hprop, NodeProp& dprop, int dir, const Link& link); + + bool check_longrel(Cat hcat, Cat dcat, LongRel rel); + bool is_sgl(Role r); + RoleSet is_obl(Cat c); + + RoleSet& constr_include(Role r) { return include[r]; }; + RoleSet& constr_exclude(Role r) { return exclude[r]; }; - int types_sz; - int cats_sz; - int flags_sz; + FlagSet initial_flags(Cat c) { return set[c]; } + FlagSet pass_flags(Role r) { return pass[r]; } + + list trigger_boubbles(Cat c, Role r, Dir d); + + bool read(FILE* f); + void write(ostream& os); + void write(FILE* f); + +private: - vector< vector< Roles > > connect; RoleSet sgl; - vector< RoleSet > obl; + vector< RoleSet > obl; //[Cat] RoleSet left; RoleSet right; - vector< RoleSet > lt; - vector< RoleSet > gt; + RoleSet init; + RoleSet fin; + FlagSet initf; + FlagSet finf; + vector< RoleSet > lt; //[Role] + vector< RoleSet > gt; //[Role] - // vector< vector< vector< - vector< FlagSet > set; - vector< FlagSet > pass; + vector< FlagSet > set; //[Cat] + // vector< FlagSet > rset; //[Role] + vector< FlagSet > pass; //[Role] - bool read(FILE* f); - void write(FILE* f); + vector< vector< Roles > > connect; //[Cat][Cat] + + vector< vector< Links > > connect1; //[Cat][Cat] + + vector< RoleSet > include; //[Role] + vector< RoleSet > exclude; //[Role] + + vector< vector< LongRels > > longrel; //[Cat][Cat] + + list< Boubble* > boubbles; + + vector< vector< list > > uptrigger;//[Cat][Role] + vector< vector< list > > dntrigger;//[Cat][Role] void add_category(const char* s); void add_type(const char* s); - void add_flag(const char* s); + void add_flag(const char* s) { Flag::add(s); } + void add_long(const char* l, const char* p) { LongRel::add(l); boubbles.push_back( new Boubble(p,l) ); } + void add_triggers(Cat h, Cat d, LongRel l); - void set_sgl(Role r) { sgl.set(r); } - void set_obl(Cat c, Role r) { obl[c].set(r); } - void set_left(Role r) { left.set(r); } - void set_right(Role r) { right.set(r); } - void set_order(Role r, Role s) { lt[s].set(r); } - void set_connect(Cat c, Cat d, Role r) { connect[c][d].insert(r); } + void set_sgl(Role r) { sgl.set(r); } + void set_obl(Cat c, Role r) { obl[c].set(r); } + void set_left(Role r) { left.set(r); } + void set_right(Role r) { right.set(r); } + void set_init(Role r) { init.set(r); } + void set_fin(Role r) { fin.set(r); } + void set_initf(Flag f) { initf.set(f); } + void set_finf(Flag f) { finf.set(f); } + void set_order(Role r, Role s) { lt[s].set(r); } + + // void set_connect(Cat c, Cat d, Role r) { connect[c][d].insert(r); } + // void set_connect(Cat c, Cat d, Flag f, Role r) { connect1[c][d].insert(Link(r,f)); } + + void set_connect(Cat h, Flag hfp, Flag hfm, Cat d, Flag dfp, Flag dfm, Role r, PropSet ps ) { connect1[h][d].insert(Link(r,ps,hfp,hfm,dfp,dfm)); } + + void set_include(Role r, Role s) { include[r].set(s); } + void set_exclude(Role r, Role s) { exclude[r].set(s); } + void set_longrel(Cat c, Cat d, LongRel l){ longrel[c][d].insert(l); } + void set_set(Cat c, Flag f) { set[c].set(f); } + void set_pass(Role r, Flag f) { pass[r].set(f); } void set_lt(Role r, Role s); void compute_gt(); - - - bool check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role); + void compute_triggers(); + bool contains_boubble(const list boubble_list, Boubble* bp) const; }; -inline bool Grammar::check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role) +//---------------------------------------------------------------------------------------------------- + +inline +Roles& Grammar::connectable(Cat h, Cat d) +{ + return connect[h][d]; +} + +//---------------------------------------------------------------------------------------------------- + +inline +Roles Grammar::connectable(Cat h, Cat d, FlagSet hfs, FlagSet dfs) // ZBYT WOLNE!!!!!!!!!!!!!!!!!!!!!!!!!! (-> Roles&) +{ + Roles ret; + for(Links::const_iterator l = connect1[h][d].begin(); l != connect1[h][d].end(); l++) + if( (l->hflagplus==0 || hfs[l->hflagplus]) && (l->hflagminus==0 || !hfs[l->hflagminus]) ) + if( (l->dflagplus==0 || dfs[l->dflagplus]) && (l->dflagminus==0 || !dfs[l->dflagminus]) ) + ret.insert(l->role); + return ret; +} + +//---------------------------------------------------------------------------------------------------- + +inline +list Grammar::connectable2(Cat h, Cat d, FlagSet hfs, FlagSet dfs) // ZBYT WOLNE!!!!!!!!!!!!!!!!!!!!!!!!!! (-> Roles&) +{ + list ret; + for(Links::const_iterator l = connect1[h][d].begin(); l != connect1[h][d].end(); l++) + if( (l->hflagplus==0 || hfs[l->hflagplus]) && (l->hflagminus==0 || !hfs[l->hflagminus]) ) + if( (l->dflagplus==0 || dfs[l->dflagplus]) && (l->dflagminus==0 || !dfs[l->dflagminus]) ) + ret.push_back(&(*l)); + return ret; +} + +//---------------------------------------------------------------------------------------------------- + +inline +bool Grammar::check_constr(NodeProp& hprop, NodeProp& dprop, int dir, Role role) // dir: 0-left 1-right { return !hprop.forbidden[role] && - ( !right[role] || dir==1 ) && - ( !left[role] || dir==0 ) + ( dir==1 || !right[role] ) && + ( dir==0 || !left[role] ) && + ( dir==1 || (hprop.attached&init).none() ) && + ( dir==0 || (hprop.attached&fin).none() ) ; } +//---------------------------------------------------------------------------------------------------- + +inline +bool Grammar::check_constr2(NodeProp& hprop, NodeProp& dprop, int dir, const Link& link) // dir: 0-left 1-right +{ + return + !hprop.forbidden[link.role] && + ( dir==1 || !right[link.role] ) && + ( dir==0 || !left[link.role] ) && + ( dir!=0 || !hprop.init_attached ) && + ( dir!=1 || !hprop.fin_attached ) + ; +} + +//---------------------------------------------------------------------------------------------------- + +inline +bool Grammar::check_longrel(Cat hcat, Cat dcat, LongRel rel) +{ + return longrel[hcat][dcat].find(rel) != longrel[hcat][dcat].end(); +} + +//---------------------------------------------------------------------------------------------------- + +inline bool Grammar::is_sgl(Role r) +{ + return sgl[r]; +} + +//---------------------------------------------------------------------------------------------------- + +inline RoleSet Grammar::is_obl(Cat c) +{ + return obl[c]; +} + +//==================================================================================================== #endif diff --git a/src/dgp/main.cc b/src/dgp/main.cc index b29171e..e948591 100644 --- a/src/dgp/main.cc +++ b/src/dgp/main.cc @@ -10,7 +10,7 @@ #include "mgraph.hh" #include "sgraph.hh" #include "grammar.hh" -#include "dgp0.hh" +#include "dgp1.hh" #include "../common/common.h" #include "cmdline.h" @@ -24,7 +24,7 @@ char grammarfile[255]; Grammar grammar; MGraph mgraph; -SGraph sgraph; +SGraph sgraph(mgraph); FILE* grammarf; FILE* debugf=stdout; @@ -64,6 +64,13 @@ main(int argc, char* argv[]) grammar.read(grammarf); fclose(grammarf); + + + // grammar.write(cout); + // exit(0); + + + mgraph.clear(); sgraph.clear(); @@ -82,7 +89,7 @@ main(int argc, char* argv[]) getfield(line,"3",segtype); if(strcmp(segtype,"EOS")==0) { - dgp0(); // parametry!!! MGraph, SGraph, Grammar + dgp1(); // parametry!!! MGraph, SGraph, Grammar output(); mgraph.clear(); @@ -104,7 +111,7 @@ void output() { if(seg_mnode[si]>=0) { - MNode& m=mgraph.nodes[seg_mnode[si]]; + MNode& m=mgraph[seg_mnode[si]]; for(vector::iterator s=m.snodes.begin(); s!=m.snodes.end(); ++s) { fputs(segment[si],outputf); diff --git a/src/dgp/mgraph.cc b/src/dgp/mgraph.cc index adc9d41..025cb11 100644 --- a/src/dgp/mgraph.cc +++ b/src/dgp/mgraph.cc @@ -3,32 +3,36 @@ #include "thesymbols.hh" #include "const.hh" -#include +#include int MGraph::add_node(char* seg) { - nodes[n].clear(); + MNode newnode; + newnode.clear(); - char field1[80], field3[80], descr[256], gph[256]; + char field1[80], field3[80], field4[256], descr[256], gph[256]; char* cat; getfield(seg,"1",field1); - nodes[n].pos=atoi(field1); + newnode.pos=atoi(field1); getfield(seg,"3",field3); + + getfield(seg,"4",field4); + strcpy(newnode.form,field4); + if(!getfield(seg,"lem",descr)) strcpy(descr,"?,?"); cat=descr; while(*cat!=',' && *cat ) ++cat; if(*cat) ++cat; -// Cat::add(cat); if(Cat::index(cat)>0) - nodes[n].cat=cat; + newnode.cat=cat; else - nodes[n].cat="NULL"; + newnode.cat="NULL"; - nodes[n].pred.clear(); + newnode.pred.clear(); char* tok; int previd; @@ -40,15 +44,16 @@ int MGraph::add_node(char* seg) } char* ids=strtok(gph,":"); - if(n!=atoi(ids)){fprintf(stderr,"Invalid node id in line ?. Program aborted.\n"); exit(1); } + if(size() != atoi(ids)) {fprintf(stderr,"Invalid node id in line ?. Program aborted.\n"); exit(1); } char *preds; while(preds=strtok(NULL,",")) { previd=atoi(preds); - nodes[n].pred.push_back(&nodes[previd]); + newnode.pred.push_back(previd); } - return n++; + nodes.push_back(newnode); + return nodes.size()-1; } diff --git a/src/dgp/mgraph.hh b/src/dgp/mgraph.hh index 373eac2..0723e24 100644 --- a/src/dgp/mgraph.hh +++ b/src/dgp/mgraph.hh @@ -7,28 +7,35 @@ #include "thesymbols.hh" #include "../common/common.h" + +using namespace std; + class MNode { public: - char type[MAXFORMLEN]; - Cat cat; int pos; - vector pred; + char form[256]; + Cat cat; + vector pred; vector snodes; void clear() { snodes.clear(); }; }; - + class MGraph { - public: +public: - MNode nodes[MAXNODES]; - int n; - - void clear() { n=0; }; + void clear() { nodes.clear(); } + int size() { return nodes.size(); } int add_node(char* seg); + MNode& operator[](int i) { return nodes[i]; } + +private: + + vector nodes; + }; #endif diff --git a/src/dgp/sgraph.cc b/src/dgp/sgraph.cc index e8d50d5..5b356b4 100644 --- a/src/dgp/sgraph.cc +++ b/src/dgp/sgraph.cc @@ -1,42 +1,42 @@ #include "global.hh" #include "sgraph.hh" -#include "mgraph.hh" #include "grammar.hh" #include "const.hh" -#include +#include +#include +extern MGraph mgraph; -int SGraph::add_base_snode(MNode* mn) +//==================================================================================================== + +int SGraph::add_base_snode(int mnodeind) { - int nodeind=n; - SNode &node=nodes[n]; + SNode& newnode = makenewnode(); - node.clear(); + newnode.mnode=mnodeind; - node.mnode=mn; - - for(vector::iterator pm=node.mnode->pred.begin(); pm!=node.mnode->pred.end(); ++pm) - for(vector::iterator ps=(*pm)->snodes.begin(); ps!=(*pm)->snodes.end(); ++ps) + for(vector::iterator pm=mgraph[newnode.mnode].pred.begin(); pm!=mgraph[newnode.mnode].pred.end(); ++pm) + for(vector::iterator ps=mgraph[*pm].snodes.begin(); ps!=mgraph[*pm].snodes.end(); ++ps) if(nodes[*ps].in_LH) { - node.LV.set(*ps); - if(nodes[*ps].saturated()) node.LV |= nodes[*ps].LH; + newnode.LV.set(*ps); + if(nodes[*ps].saturated()) newnode.LV |= nodes[*ps].LH; } - mn->snodes.push_back(nodeind); - ++n; + mgraph[newnode.mnode].snodes.push_back(lastnodeind()); - node.in_LH=true; + newnode.in_LH=true; - return nodeind; + return lastnodeind(); } +//==================================================================================================== void SGraph::update_left(int headind, int depind) { SNode &head=nodes[headind], &dep=nodes[depind]; - if(dep.saturated()) head.LV |= dep.LV, head.LD |= dep.LD; + if(dep.saturated()) head.LV |= dep.LV, head.LD |= dep.LD; } @@ -45,82 +45,98 @@ void SGraph::update_right(int headind, int depind) SNode &head=nodes[headind], &dep=nodes[depind]; dep.LH.set(headind); - if(head.saturated()) - dep.LH |= head.LH; -} + if(head.saturated()) dep.LH |= head.LH; +} +//==================================================================================================== int SGraph::clone(int ancind, NodeProp newprop) { - int newind = n++; - SNode &newnode=nodes[newind]; + SNode &newnode=makenewnode(); SNode &ancnode = nodes[ancind]; - newnode.clear(); + + newnode.prop=newprop; newnode.mnode=ancnode.mnode; - newnode.mnode->snodes.push_back(newind); - return newind; + mgraph[newnode.mnode].snodes.push_back(lastnodeind()); + + return lastnodeind(); } - -//------------------------------------------------------------------------- -//------------------------------------------------------------------------- - +//==================================================================================================== int SGraph::print_node(FILE* f, int n, unsigned int info) { - char buf[1000]; - sprint_node(buf,n,info); + char buf[50000]; + sprint_node(buf,n,-1,info); fputs(buf,f); } -int SGraph::sprint_node(char* buf, int nodeind, unsigned int info) +//---------------------------------------------------------------------------------------------------- + +int SGraph::print_node_debug(FILE* f, const char* pref, int n, int anc) +{ + char buf[50000]; + sprint_node_debug(buf,pref,n,anc); + fputs(buf,f); +} + +//---------------------------------------------------------------------------------------------------- + +void SGraph::print_arc(FILE* f, int head, int dep, Role role, int dir) // 0 - left, 1 - right +{ + if(dir==0) + fprintf(f,"#A %s:%d <-- %d\n", role.str(), dep, head); + else + fprintf(f,"#A %s:%d --> %d\n", role.str(), head, dep); +} + +//==================================================================================================== + +int SGraph::sprint_node(char* buf, int nodeind, int anc, unsigned int info) { char* buf0=buf; - char descr[256]; - char nodeinfo[16]; SNode &node=nodes[nodeind]; buf+=sprintf(buf," dgp:%d",nodeind); + if(anc>=0) buf+=sprintf(buf,"(%d)",anc); buf+=sprintf(buf, saturated(nodeind) ? ";s" : ";u"); - bool cont=false; - if (info&HEADS) - { + if (info&HEADS || info&DEPS) buf+=sprintf(buf,";"); + + bool cont=false; + + if (info&HEADS) for(vector::iterator h=node.heads.begin(); h!=node.heads.end(); ++h) { if(cont) buf+=sprintf(buf,","); else cont=true; - buf+=sprintf(buf,"++%s-%d/%d",h->role.str(),h->dst,h->anc); + buf+=sprintf(buf,"++%s-%d(%d~%d)",h->role.str(),h->dst,h->headanc,h->depanc); } - } - + if (info&DEPS) - { - buf+=sprintf(buf,";"); for(vector::iterator d=node.deps.begin(); d!=node.deps.end(); ++d) { // if(! nodes[d->dst].saturated()) continue; // NIE DRUKUJ NIENASYCONYCH PODRZEDNIKOW if(cont) buf+=sprintf(buf,","); else cont=true; - buf+=sprintf(buf,"--%s-%d/%d",d->role.str(),d->dst,d->anc); + buf+=sprintf(buf,"--%s-%d(%d~%d)",d->role.str(),d->dst,d->headanc,d->depanc); } - } if (info&SETS) { int ord=0; buf+=sprintf(buf,";{"); - for(vector::iterator pm=node.mnode->pred.begin(); pm!=node.mnode->pred.end(); ++pm) - for(vector::iterator ps=(*pm)->snodes.begin(); ps!=(*pm)->snodes.end(); ++ps) + for(vector::iterator pm=mgraph[node.mnode].pred.begin(); pm!=mgraph[node.mnode].pred.end(); ++pm) + for(vector::iterator ps=mgraph[*pm].snodes.begin(); ps!=mgraph[*pm].snodes.end(); ++ps) buf+=sprintf(buf, ord++ ? ",%d" : "%d", *ps); buf+=sprintf(buf,"};{"); - ord=0;for(int j=0; j<=n; ++j) if(node.LV[j]) buf+=sprintf(buf, ord++ ? ",%d" : "%d", j); + ord=0;for(int j=0; j",(cont++)?",":"",i.str()); + if(node.prop.init_attached) + buf+=sprintf(buf,""); + if(node.prop.fin_attached) + buf+=sprintf(buf,""); + + stringstream oss; + for(list::iterator b = node.prop.boubbles.begin(); b != node.prop.boubbles.end(); b++) + oss << (cont++ ? "," : "") << **b; + buf+=sprintf(buf,oss.str().c_str()); } // buf+=sprintf(buf,"\n"); @@ -140,26 +169,15 @@ int SGraph::sprint_node(char* buf, int nodeind, unsigned int info) } -int SGraph::sprint_node_debug(char* buf, const char* pref, int n) +int SGraph::sprint_node_debug(char* buf, const char* pref, int n, int anc) { char *buf0 = buf; buf+=sprintf(buf,"#%s",pref); - buf+=sprint_node(buf,n,HEADS|DEPS|SETS|CONSTRAINTS); + + buf+=sprintf(buf,"%-16s",form(n)); + + buf+=sprint_node(buf,n,anc,HEADS|DEPS|SETS|CONSTRAINTS); buf+=sprintf(buf,"\n"); return buf-buf0; } -int SGraph::print_node_debug(FILE* f, const char* pref, int n) -{ - char buf[1000]; - sprint_node_debug(buf,pref,n); - fputs(buf,f); -} - -void SGraph::print_arc(FILE* f, int left, int right, Role role, int dir) // 0 - left, 1 - right -{ - fprintf(f,"# %s:%s.%02d %s %s.%02d\n", - role.str(),nodes[left].mnode->type,left, - dir ? "-->" : "<--", - nodes[right].mnode->type,right); -} diff --git a/src/dgp/sgraph.hh b/src/dgp/sgraph.hh index 1c04e39..e829b06 100644 --- a/src/dgp/sgraph.hh +++ b/src/dgp/sgraph.hh @@ -8,40 +8,170 @@ #include #include "const.hh" +#include "mgraph.hh" #include "thesymbols.hh" +#include "boubble.hh" -class MNode; - +using namespace std; +//==================================================================================================== +// CLASS Arc +//==================================================================================================== struct Arc { int dst; Role role; - int anc; + int headanc; + int depanc; - Arc(int d, Role r, int a) : dst(d), role(r), anc(a) {}; - }; + Arc(int d, Role r, int ha, int da) : dst(d), role(r), headanc(ha), depanc(da) {}; +}; +//==================================================================================================== +// CLASS NodeProp +//==================================================================================================== struct NodeProp { - bitset required; - bitset forbidden; + NodeProp(); + NodeProp(const NodeProp& p); + ~NodeProp(); - bool operator==(const NodeProp& p) - { return required==p.required && forbidden==p.forbidden; } + bool operator==(const NodeProp& p); + NodeProp& operator=(const NodeProp& p); - void clear() - { required.reset(), forbidden.reset(); } + void clear_boubbles(); + void merge_boubbles(list new_boubbles); + void copy(const NodeProp& p); + void clear(); + + RoleSet required; + RoleSet forbidden; + RoleSet attached; + + bool init_attached; + bool fin_attached; + + FlagSet flags; + + list boubbles; }; +//---------------------------------------------------------------------------------------------------- + +inline +bool NodeProp::operator==(const NodeProp& p) +{ + if(required != p.required) return false; + if(forbidden != p.forbidden) return false; + if(attached != p.attached) return false; + if(flags != p.flags) return false; + if(init_attached != p.init_attached) return false; + if(fin_attached != p.fin_attached) return false; + + list::const_iterator b1 = p.boubbles.begin(); + for(list::const_iterator b = boubbles.begin(); b != boubbles.end(); b++) + { + if(b1 == p.boubbles.end()) + return false; + if(!(**b == **b1)) + return false; + } + if(b1 != p.boubbles.end()) + return false; + + return true; +} + +//---------------------------------------------------------------------------------------------------- + +inline +void NodeProp::clear_boubbles() +{ + for(list::iterator b = boubbles.begin(); b!=boubbles.end(); b++) + delete *b; + boubbles.clear(); +} + +//---------------------------------------------------------------------------------------------------- + +inline +void NodeProp::merge_boubbles(list new_boubbles) +{ + boubbles.merge(new_boubbles); +} + +//---------------------------------------------------------------------------------------------------- + +inline +void NodeProp::copy(const NodeProp& p) +{ + required=p.required; + forbidden=p.forbidden; + attached=p.attached; + flags=p.flags; + init_attached=p.init_attached; + fin_attached=p.fin_attached; + for(list::const_iterator b = p.boubbles.begin(); b!=p.boubbles.end(); b++) + boubbles.push_back(new Boubble(**b)); +} + +//---------------------------------------------------------------------------------------------------- + +inline +NodeProp::~NodeProp() +{ + clear_boubbles(); +} +//---------------------------------------------------------------------------------------------------- + +inline +NodeProp::NodeProp() +{ + clear(); +} + +//---------------------------------------------------------------------------------------------------- + +inline +NodeProp::NodeProp(const NodeProp& p) +{ + copy(p); +} + +//---------------------------------------------------------------------------------------------------- + +inline +NodeProp& NodeProp::operator=(const NodeProp& p) +{ + clear(); + copy(p); + return *this; +} + +//---------------------------------------------------------------------------------------------------- + +inline +void NodeProp::clear() +{ + required.reset(); + forbidden.reset(); + attached.reset(); + init_attached=false; + fin_attached=false; + clear_boubbles(); +} + +//==================================================================================================== +// CLASS SNode +//==================================================================================================== struct SNode { - MNode* mnode; + int mnode; NodeProp prop; @@ -53,56 +183,80 @@ struct SNode vector heads; vector deps; - void clear() { prop.clear(), LV.reset(), LD.reset(), LH.reset(), heads.clear(), deps.clear(); } - bool saturated() { return prop.required.none(); } + void clear(); + bool saturated(); }; +//---------------------------------------------------------------------------------------------------- +inline +void SNode::clear() +{ prop.clear(), LV.reset(), LD.reset(), LH.reset(), heads.clear(), deps.clear(); } +//---------------------------------------------------------------------------------------------------- +inline +bool SNode::saturated() +{ return prop.required.none(); } +//==================================================================================================== +// SGraph CLASS +//==================================================================================================== class SGraph { public: - SNode nodes[MAXNODES]; - int n; // number of nodes + enum Output { HEADS=1, DEPS=2, SETS=4, CONSTRAINTS=8, BOUBBLES=16 }; - enum Output { HEADS=1, DEPS=2, SETS=4, CONSTRAINTS=8 }; + SGraph(MGraph& mg) : mgraph(mg) { clear(); } - SGraph() : n(0) {} + SNode& operator[](const int i) { return nodes[i]; } - void clear() { n=0; } - - int add_base_snode(MNode* mn); - int clone(int ancind, NodeProp newprop); + void clear() { nodes.clear(); } + int add_base_snode(int mnodeind); + int clone(int ancind, NodeProp newprop); void update_left(int headind, int depind); void update_right(int headind, int depind); - bool visible(int left, int right); bool saturated(int node); - //-------------------------------------------------------------------- + Cat cat(int i) const { return mgraph[nodes[i].mnode].cat; } + char* form(int i) const { return mgraph[nodes[i].mnode].form; } - void read(FILE* f); - void write(FILE* f, list nodelist, unsigned int info); - - int sprint_node(char* buf, int n, unsigned int info); int print_node(FILE* f, int n, unsigned int info); - int sprint_node_debug(char* buf, const char* pref, int n); - int print_node_debug(FILE* f, const char* pref, int n); + int print_node_debug(FILE* f, const char* pref, int n, int anc); void print_arc(FILE* f, int left, int right, Role role, int dir); // 0 - left, 1 - right + //private: + + int size() {return nodes.size(); } + +private: + + MGraph& mgraph; + + vector nodes; + + int lastnodeind() { return nodes.size()-1; } + SNode& makenewnode() { nodes.push_back(SNode()); nodes.back().clear(); return nodes.back(); } + + int sprint_node(char* buf, int n, int anc, unsigned int info); + int sprint_node_debug(char* buf, const char* pref, int n, int anc); }; +//---------------------------------------------------------------------------------------------------- inline bool SGraph::visible(int left, int right) { return nodes[right].LV[left]; } +//---------------------------------------------------------------------------------------------------- + inline bool SGraph::saturated(int node) { return nodes[node].saturated(); } +//---------------------------------------------------------------------------------------------------- + #endif diff --git a/src/dgp/symbol.hh b/src/dgp/symbol.hh index 2a70bc2..3d24de5 100644 --- a/src/dgp/symbol.hh +++ b/src/dgp/symbol.hh @@ -2,9 +2,8 @@ #define _SYMBOL_HH #include -//#include #include -#include +#include #include #include #include @@ -57,7 +56,7 @@ class Symbols /// Symbol class template. /** The template argument determines the symbol space. - Each space is created with symbol "NULL" with indexed 0 already in. + Each space is created with symbol "NULL" with index 0 already in. */ template @@ -103,7 +102,12 @@ class Symbol */ Symbol(const char * s) : val(defs[s]) {}; - + + Symbol(string s) : val(defs[(char*)s]) {}; + + + bool empty() const { return val==0; } + /// Symbol to char* conversion. If symbol is invalid, NULL is returned. const char* str() const { return (val>=0 && val s=1; s; s++ ) ... s=0; while(++s) ... */ + (operator int)() const { return val; }; Symbol operator++() {val++; return *this;} diff --git a/src/dgp/thesymbols.hh b/src/dgp/thesymbols.hh index b90f997..4c85b04 100644 --- a/src/dgp/thesymbols.hh +++ b/src/dgp/thesymbols.hh @@ -8,22 +8,29 @@ #include #include -typedef Symbol<1> Cat; +using namespace std; -typedef Symbol<2> Role; -typedef list RoleList; -typedef list::iterator RoleListIter; -typedef bitset RoleSet; -typedef set Roles; -typedef Roles::iterator RolesIter; +typedef Symbol<1> Cat; +typedef bitset CatSet; -typedef Symbol<3> Constr; -typedef list ConstrList; +typedef Symbol<2> Role; +typedef list RoleList; +typedef list::iterator RoleListIter; +typedef bitset RoleSet; +typedef set Roles; +typedef Roles::iterator RolesIter; + +typedef Symbol<3> Constr; +typedef list ConstrList; typedef list::iterator ConstrListIter; -typedef Symbol<4> Rel; +typedef Symbol<4> LongRel; +typedef set LongRels; -typedef Symbol<5> Flag; -typedef bitset FlagSet; +typedef Symbol<5> Flag; +typedef bitset FlagSet; + +typedef Symbol<6> Prop; +typedef bitset PropSet; #endif diff --git a/src/gph/gph b/src/gph/gph index 3f739c8..6eab711 100755 --- a/src/gph/gph +++ b/src/gph/gph @@ -13,7 +13,7 @@ use File::HomeDir; -my $systemconfigfile='/usr/local/etc/utt/gph.conf'; +my $systemconfigfile='/etc/utt/gph.conf'; my $userconfigfile=home()."/.utt/gph.conf"; Getopt::Long::Configure('no_ignore_case_always'); diff --git a/src/tre/Makefile b/src/tre/Makefile new file mode 100644 index 0000000..2ed32d9 --- /dev/null +++ b/src/tre/Makefile @@ -0,0 +1,17 @@ +include ../../config.mak + +tre: + +.PHONY: install +install: +ifdef BIN_DIR + install -m 0755 tre $(BIN_DIR) +endif + +.PHONY: uninstall +uninstall: +ifdef BIN_DIR + rm $(BIN_DIR)/tre +endif + +clean: diff --git a/src/tre/tre b/src/tre/tre new file mode 100755 index 0000000..7f3b4c3 --- /dev/null +++ b/src/tre/tre @@ -0,0 +1,435 @@ +#!/usr/bin/ruby1.9.1 -I /usr/local/lib/utt -I $HOME/.local/lib/utt +# -*- coding: iso-8859-2 -*- + +$: << "#{ENV['HOME']}/.local/lib/utt" +$: << "/usr/local/lib/utt" + +require 'getoptlong' +require 'seg.rb' + +opts = GetoptLong.new( +[ '--help', '-h', GetoptLong::NO_ARGUMENT ], +[ '--debug', '-d', GetoptLong::NO_ARGUMENT ], +[ '--format', '-F', GetoptLong::REQUIRED_ARGUMENT ], +[ '--info', '-I', GetoptLong::REQUIRED_ARGUMENT ], +[ '--span', '-s', GetoptLong::REQUIRED_ARGUMENT ], +[ '--maxsize', GetoptLong::REQUIRED_ARGUMENT ], +[ '--forest', GetoptLong::NO_ARGUMENT ], +[ '--ground', GetoptLong::NO_ARGUMENT ], +[ '--only-trees','-t', GetoptLong::NO_ARGUMENT ]) + +$helptext= +"The program generates trees from the graph output by dgp. dgp must\n"+ +"must be run with '-i ds' option.\n\n"+ +"Command: tre [options]\n\n"+ +"Options:\n"+ +"--help -h Print help (this text) and exit.\n"+ +"--debug -d Verbose output. For developers only.\n"+ +"--format=s -F s Output format. Recognized values:\n"+ +" a root + list of arcs\n"+ +" p parenthesized notation\n"+ +" h human readable indented tree format\n"+ +" Multiple values are allowed. (default p)\n"+ +"--info=s -I s Information printed. Recognized values:\n"+ +" n node identifier\n"+ +" f surface form\n"+ +" m morphological information\n"+ +" l arc labels\n"+ +"--only-trees -t Do not copy input. Print trees only.\n" + +$DEBUG=false +$FORMAT='p' +$INFO='DEFAULT' +$ONLYTREES=false +$START=nil +$END=nil +$FOREST=false +$MAXSIZE=nil + +opts.each do |opt, arg| + case opt + when '--help' + print $helptext + exit 0 + when '--debug' + $DEBUG=true + when '--format' + $FORMAT=arg + when '--info' + $INFO=arg + when '--only-trees' + $ONLYTREES=true + when '--forest' + $FOREST=true + when '--ground' + $GROUND=true + when '--maxsize' + $MAXSIZE=arg.to_i + when '--span' + $START,$END = arg.split ',' + else + print "Unknown option #{opt}. Ignored.\n" + end +end + +if $INFO=='DEFAULT' + case $FORMAT + when 'p','a' + $INFO='nl' + when 'h' + $INFO='fmnl' + end +end + +$dgpsep=';' + +def tre(input) + $gphid=[] + $form=[] + $lem=[] + $ord1=[] + $count=0 + nodes=[] + prevpos=-1 + tokennumber=0 + for line in input + seg=Seg.new(line) + print line unless $ONLYTREES || seg.field(3) == 'EOS' + + if dgp=seg['dgp'] + if nodes==[] && seg[3]!='BOS' + print "A sentence must start with BOS segment. Aborting.\n" + return + end + + id=dgp[/^\d+/].to_i + + if gph=seg['gph'] + $gphid[id]=gph[/^\d+/].to_i + else + print "No gph field. Aborting.\n" + return + end + + $form[$gphid[id]] = seg[4] + $lem[$gphid[id]] = seg['lem'] + $ord1[$gphid[id]] = if prevpos==seg[1].to_i then tokennumber + else prevpos=seg[1].to_i; tokennumber+=1 end + + nodes[id] = [seg[1].to_i,seg[2].to_i,dgp] + + if seg[3]=='EOS' + + $pref = "#{seg[1]} #{seg[2]} SYN *" + + parsegraph(nodes) + + set_ord #(0...(nodes.length)).each{|i| set_distance_from_i i } + + printgraph if $DEBUG + + if $GROUND + printground + else + thetrees = $FOREST ? genforest : gentrees + + output_trees thetrees + + print line unless $ONLYTREES + + $gphid=[] # POWTÓRZENIE + $form=[] + $lem=[] + $ord1=[] + $count=0 + nodes=[] + prevpos=-1 + tokennumber=0 + end + end + end + end +end + + +def output_trees trees + for t in trees + $count += 1 + t1=ground(t) + + span = $FOREST ? " span:" + (ground_tree_min(t1).to_s + ","+ground_tree_max(t1).to_s)+";" : "" + case $FORMAT + when /a/ + print "#{$pref} tre:#{$count}#{span} #{arcsinfo(t1[0],t1[1])}" +# print arcsinfo(t1[0],t1[1]) + print "\n" + when /p/ + print "#{$pref}#{span} tre:#{$count} par:" + printpar(t1[0],t1[1]) + print "\n" + when /h/ + print "#\n# tree #{$count}\n# ------\n" + printtree(t1[0],t1[1],0) + end + end +end + + +def nodeinfo(id) + info="" + if $INFO =~ /o/ + info += $ord1[id].to_s + info += '.' if $INFO =~ /[nfm]/ + end + if $INFO =~ /n/ + info += id.to_s + info += '.' if $INFO =~ /[fm]/ + end + if $INFO =~ /f/ + info += $form[id] + info += ';' if $INFO =~ /m/ + end + if $INFO =~ /m/ + info += $lem[id] + end + info +end + + +def arcsinfo(root,arcs) + "head:#{nodeinfo(root)} links:" + arcs.map{|a| "(#{($INFO =~ /l/) ? a[2]+":" : ""}#{nodeinfo(a[0])}-#{nodeinfo(a[1])})"}.join("") +# for a in arcs +# print ';' +# print "#{a[2]}:" if $INFO =~ /l/ +# print nodeinfo(a[0])+'-'+nodeinfo(a[1]) +# end +end + +def printtree(root,arcs,o) + if o==0 + print "# %-16s" % "root: " + end + print nodeinfo(root),"\n" + for arc in arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] } + print '# '," "*(o+1) + print "%-16s" % (arc[2]+": ") + printtree(arc[1],arcs,o+1) + end +end + +def printpar(root,arcs) + print nodeinfo(root) + deps = arcs.select{ |a| a[0]==root }.sort{|a,b| a[1]<=>b[1] } + unless deps == [] + print '(' + cont=false + for arc in deps + if cont then print ',' else cont=true end + print arc[2],':' if $INFO =~ /l/ + printpar(arc[1],arcs) + end + print ')' + end +end + + +def ground_tree_min t + ([t[0]]+t[1].map{|e| [e[0],e[1]]}).flatten.min +end + +def ground_tree_max t + ([t[0]]+t[1].map{|e| [e[0],e[1]]}).flatten.max +end + + + +def parsegraph(nodes) + + $n =nodes.length + $sat =[]; $vis =[]; $succ=[]; $lhs =[]; $arcs=[]; $pos=[]; $len=[]; $ord=[]; $distance={} + + for dgp in nodes + + parts = dgp[2].split($dgpsep,7) + + if parts[3]==nil || parts[4]==nil || parts[5]==nil + $stderr.print "ERR: tre requires dgp be called with '--info s' option. Aborting.\n" + exit + end + + i = parts[0].to_i + $pos[i] = dgp[0].to_i + $len[i] = dgp[1].to_i + $sat << i if parts[1]=="s" + + $arcs |= parts[2].split(',').map{ |a| case a + when /\-\-(\w+)-(\d+)\((\d+)~(\d+)\)/ + [i, $2.to_i, $1, $3.to_i, $4.to_i] + when /\+\+(\w+)-(\d+)\((\d+)~(\d+)\)/ + [$2.to_i, i, $1, $3.to_i, $4.to_i] + end } + $succ |= parts[3][1..-2].split(',').map{|x| [x.to_i,i]} + $vis |= parts[4][1..-2].split(',').map{|x| [x.to_i,i]} + $lhs |= parts[5][1..-2].split(',').map{|x| [x.to_i,i]} + [[i,i]] + + end + +end + + +def ground(t) + [ $gphid[t[0]] , t[1].map{|a| [$gphid[a[0]],$gphid[a[1]],a[2]]} ] +end + + +#NOWE-START + +def successors i + $succ.select{|e| e[0]==i}.map{|e| e[1]} +end + +def predecessors i + $succ.select{|e| e[1]==i}.map{|e| e[0]} +end + +def start_nodes + $succ.map{|e| e[1]}.map{|e| predecessors(e)}.uniq.map{|e| e[0]} +end + +def end_nodes + $succ.map{|e| e[0]}.map{|e| successors(e)}.uniq.map{|e| e[0]} +end + +def set_ord + positions = $pos.uniq.sort + (0...$n).each{|i| $ord[i] = positions.index($pos[i]) } +end + + +def set_distance_from_i i + set_distance_from_i_to_jth_successors_to_v i, i, 1 +end + +def set_distance_from_i_to_jth_successors_to_v i, j , v + succ = successors(j) + for j1 in succ + $distance[[i,j1]] = v + set_distance_from_i_to_jth_successors_to_v i, j1, v+1 + end +end + +#NOWE-END + + +def gentrees + bos=0; eos=$n-1; + gentrees2 bos, eos +end + + +def genforest + forest=[] + for bos in start_nodes + for eos in end_nodes # tu są też wierzchołki poprzedzające!!! + next if $ord[bos] > $ord[eos] or ($MAXSIZE != nil and $ord[eos] - $ord[bos] > $MAXSIZE+1) + forest += gentrees2(bos,eos) + end + end + forest +end + +def gentrees2 bos, eos + $thetrees=[]; + roots = (1...eos).select{|i| $vis.include? [i,eos]}.select{|i| $vis.include? [bos,i]} + + if $DEBUG then print "ROOTS: #{roots.inspect}\n" end + for root in roots + gentrees3 bos, eos, root + end + $thetrees +end + +def gentrees3 bos, eos, root + $theroot=root + $thebos=bos + $theeos=eos + for r in buildR(root , eos, []) + (rmin,rmax,rtree) = r + buildR(bos, rmin, rtree) + end +end + +def buildR(min, max, tree) + if $DEBUG then print "buildR--#{min}--#{max}--#{tree.inspect}\n" end + trees=[] + for a in $arcs.select{|a| a[0]==max && $vis.include?([min,a[1]]) } + if $DEBUG then print "ARC: #{a.inspect}\n" end + for r in buildR(a[4],a[3],tree+[a]) #!!! buildR(a[1],a[3],tree+[a]) + (rmin,rmax,rarcs) = r + for l in buildR(min,rmin,rarcs) + (lmin,lmax,larcs) = l + trees << [lmin,rmax,larcs] + end + end + end + for i in (0...$n).select{|i| $succ.include?([i,max])}.select{|i| $lhs.include?([min,i])} + for l in buildL(min,i,tree) + (lmin,lmax,larcs) = l + trees << [lmin,lmax,larcs] + end + end + trees +end + + +def buildL(min,max,tree) + if $DEBUG then print "buildL--#{min}--#{max}--#{tree.inspect}\n" end + if $pos[min]==$pos[max] + if min==$thebos && max==$thebos + $thetrees.push [$theroot,tree] + if $DEBUG then print "adding tree: #{tree.inspect}\n" end + end + return [[max,max,tree]] + end + trees=[] + for arc in $arcs.select{|a| a[1]==max && $lhs.include?([min,a[0]]) } + if $DEBUG then print "ARC: #{arc.inspect}\n" end + for r in buildR(arc[3],arc[4],tree+[arc]) ### buildR(arc[3],max,tree+[arc]) + (rmin,rmax,rarcs) = r + for l in buildL(min,rmin,rarcs) + (lmin,lmax,larcs) = l + trees << [lmin,lmax,larcs] + end + end + end + trees +end + + +def printgraph() + print "N: #{$n}\n" + print "SAT: #{set_to_s($sat)}\n" + print "SUCC: #{rel_to_s($succ)}\n" + print "VIS: #{rel_to_s($vis)}\n" + print "LHS: #{rel_to_s($lhs)}\n" + print "ARCS: #{arcs_to_s($arcs)}\n" +end + + +def printground + for i in 1...($form.length-1) + print "#{$ord1[i]} #{$form[i]} #{$lem[i]} " + print $arcs.select{|a| $ord1[$gphid[a[1]]] == $ord1[i]}.map{|a| "#{a[2]}:#{$ord1[$gphid[a[0]]]}"}.sort.uniq.join(' ') + print "\n" + end +end + + +def set_to_s(s) "{#{s.join(',')}}" end +def rel_to_s(r) "{#{r.map{|p| "(#{p[0]},#{p[1]})"}.join(',')}}" end +def arc_to_s(q) "-#{q[0]}-#{q[2]}-#{q[1]}/#{q[3]}" end +def arcs_to_s(a) "{#{a.map{|q| arc_to_s(q)}.join(',')}}" end + +###################################################################### + +tre($stdin)