config.ini.example

#main config file. See core/ParZu_parameters.pl for some technical parameters.

#Temporary directory: some temporary files are created during parsing.
#can be any existing directory, or 'local' (in which case the temporary files will be created in the 'tmp' folder in the installation directory.
#make sure that any users have write permissions

tempdir = local


#######################################

#Use unique names for temporary files. 
#0 to disable, 1 to enable
#Use 1 in multi-user environments
#Default: 0

uniquetmp = 1

#######################################

#Delete temporary files after parsing. More resource friendly, especially if uniquetmp (see above) is enabled
#Set to 0 for debugging purposes.
#Default: 1

deltemp = 1

#######################################

#Tagger command
#You can use any POS tagger that uses the STTS tagset and the expected input/output format. For more info, check the TECHNICAL FAQ section in the Readme file
#treetagger-wrapper.py is written specifically for TreeTagger, and you need to set the correct path to the TreeTagger binary in treetagger-wrapper.py if you use it.

taggercmd = /data/clevertagger/clevertagger

#######################################

#Gertwol commands

gertwolcmd = /opt/bin/uis-gertwol
gertwolscorecmd = /opt/bin/gertwolscore

#######################################

# model for SMOR morphological analysis.
# will be called with fst-infl (requires sfst)
# pre-compiled Zmorge models can be found on https://pub.cl.uzh.ch/users/sennrich/zmorge/
smor_model = /home/rico/parzu/zmorge-20140224-smor_newlemma.ca

#######################################

#Morphology: set this to none if Gertwol is not installed/available on the system
#gertwol: use gertwol for morphological analysis
#smor: use SMOR (or compatible tool: Zmorge or morphisto) for morphological analysis
#none: no morphological analysis
#keep: use morphological analyis from input. Use this if do your own preprocessing
#Default: smor

morphology = smor


#######################################

#Output format: supported so far: 
#conll: CoNLL 2007 data format (defined at http://nextens.uvt.nl/depparse-wiki/DataFormat )
    #POSITION    WORDFORM    LEMMA    CPOS    POS    MORPHOLOGY    HEADPOSITION    DEPREL    _    _
#graphical: generate one SVG image per sentence in your current directory, using DepSVG.
#moses: format used by moses SMT system
    #WORDFORM|POS|DEPREL|HEAD
#preprocessed: return preprocessed input (unparsed, but parser-ready)
#prolog: prolog-readable format
    #word(Sentence, Position, WordForm, Lemma, POS, DepRel, HeadPosition, Morphology).
#raw: parser output without postprocessing (trees are projective; access to all input information (lemmas/morph. analysis etc.))
#Default: conll

outputformat = conll


#######################################

#Input format: supported so far: 
#plain: plain text
#preprocessed: parser-ready input (no further preprocessing required)
#tokenized: one token per line; empty lines mark sentence boundaries
#tagged: POS-tagged text. Format is 'token \t tag \n'
#Default: plain

inputformat = plain

#######################################

#Number of parallel processes of the main parsing step
# 0 uses cpu_count
# 1 disables multiprocessing
# Any x > 1 makes the program spawn x parser processes
# Any x < 0 makes the program spawn (cpu_count-x) parser processes
# Default: 1

#Number of parallel processes of the main parsing step
# 0 uses cpu_count
# 1 disables multiprocessing
# Any x > 1 makes the program spawn x parser processes
# Any x < 0 makes the program spawn (cpu_count-x) parser processes
# Default: 1

#(Requires python 2.6 or newer; older versions automatically use only one process)

processes = 4

#######################################

#Use nbest tagging output
#This currently only works with a CRF++ tagger
#Set to '0' to disable, or any positive integer to enable
nbestmode = 0

#######################################

#probability cutoff in nbest mode
#Discard POS analyses before tagging if their probability, divided by the probability of the best analysis, is below this threshold.
#Allows for high values of nbestmode without sacrificing too much time
nbest_cutoff = 0.05