src/main/resources/master.conf

include "application"

crawler {
    
  modeler {
	// This section is for the modeling phase
	
	// Please store your CSV models in src/main/resources/targets
	// and your golden models in src/main/resources/golden
  	
//  "http://your-website.com" {
//  	static.file = 	<filename.csv> CSV model with which the crawling will be performed
//						(do not use in combination with dynamic.pages)
//  	dynamic.pages = <integer n> builds the model from scratch crawling at most n urls
// 						(do not use in combination with static.file
//						the generated model will be used if neither static.file nor dynamic.pages
//						is specified
//						if a model of the website is already present and dynamic.pages is used,
//						a new version of the model will be generated)
//  	wait = 			<integer> fixed pause between HTTP requests in milliseconds
//			   			(default is 2000)
//  	randompause = 	<integer i> random pause in the range [0,i] between HTTP requests in milliseconds
//					  	(default is 1000)
//  	javascript = 	<boolean> whether or not javascript will be loaded
//					 	(default is false)
//  	maxfailures = 	<integer> max attempts to fetch a url
//					  	(default is 1)
//  	crawl = 		<boolean> whether or not this website will be crawled with the model
//						(default is false)
//		golden = 		<filename.csv> golden model to perform an evaluation of the generated model
//				 		(to be used only in combination with dynamic.pages)
//		savepages = 	if true, the pages classified with the dynamic modeling will be saved
//						along with a CSV of triples (url,file_location,pageclass)
//						(default is false)
//  }
  	    
  }
  
  crawling {
  	// This section is for the actual crawling phase
  	// Every parameter refers to a single website
  	
	fetchers = 1 // number of fetchers to use
	pages = 1000000 // max number of pages to be fetched
	frontierheap = 10000 // max number of urls allowed in the in-memory part of the frontier
  }
  
}

nodes {
	// This sections specifies the available hosts that will perform the crawl
	// Name each host repositoryN with N = 0,1,2,...,n
	// repository0 must be named CrawlSystem, the following ones RepositorySystemN
	
	repository0 {
	  host = "127.0.0.1"
	  port = 2552
	  system = "CrawlSystem"
	}
//	repository1 {
//	  host = "192.168.1.132"
//	  port = 2552
//	  system = "RepositorySystem1"
//	}
}

akka {
  // Akka configuration for this node (repository0)
  // Every repositoryN must define his repositoryN.conf file and overwrite this section
  // with its address
  
  actor {
  	provider = remote
  }
  remote {
    log-remote-lifecycle-events = off
    artery {
      enabled = on
      canonical.hostname = "127.0.0.1"
      canonical.port = 2552
    }
  }
}