diff --git a/app.py b/app.py index ac77404..28fca91 100644 --- a/app.py +++ b/app.py @@ -535,7 +535,7 @@ def upload_file(request): elif combineOperator == 'True' or combineOperator == 'true': combineOperator = 'AND' try: - if title_query is not None or content_query is not None : + if title_query != '' and content_query != '': title_query = row.get("title") content_query = row.get("content") @@ -691,6 +691,12 @@ def about(): data, unique_types, selected_type = indicators(request) return render_template('about.html', data=data, unique_types=unique_types, selected_type=selected_type, indicator_metadata=INDICATOR_METADATA) +@app.route('/dashboard') +@clean_inputs +def dashboard(): + return render_template('dashboard.html') + + @app.route('/domain_labels') @clean_inputs def parse_gate_domain_labels(request=None, text=None): @@ -861,7 +867,6 @@ def fetch_gdelt_results(title_query, content_query, combineOperator, language, c print(f"Error during request: {e}") return None -# Trunk content recievind def fetch_content_results(title_query, content_query, combineOperator, language, country, engines=['google', 'google_news', 'bing', 'bing_news', 'duckduckgo', 'yahoo', 'yandex', 'gdelt', 'copyscape']): title_query = truncate_text(title_query) @@ -1065,6 +1070,11 @@ def customize_params_by_platform(title_query, content_query, combineOperator, la country_yahoo = 'us' if country_language not in COUNTRY_LANGUAGE_DUCKDUCKGO: country_language = 'wt-wt' + if len(language_country) > 5: # trim down to 5 characters by lopping off the front 3, prevents errors like zh-zh-cn + language_country = language_country[:-3] + if len(country_language) > 5: + country_language = country_language[3:] + paramsList = { "google": { @@ -1170,7 +1180,7 @@ def convert_results_to_csv(results): data['domain'], str(data['domain_count']), data['title'], - data['snippet'], + data['snippet'] if data['snippet'] is not None else '', data['url'], str(data['link_count']), ', '.join(data['engines']), diff --git a/templates/dashboard.html b/templates/dashboard.html new file mode 100644 index 0000000..2e25d28 --- /dev/null +++ b/templates/dashboard.html @@ -0,0 +1,320 @@ + + + + + + + Information Laundromat - About + + + + + +
+
+
+
+
+

The Information Laundromat

+
+
+
+
+

+ Search +

+
+
+
+
+

+ About +

+
+
+
+
+

+ Indicators +

+
+
+ +
+
+
+
+
+
+

About the Dashboard

+

Thi dashboard that visualizes connections between state-sponsored content and its dissemination. Users can explore top matches by source and search engine, gaining a clear view of how disinformation spreads across the internet. + Users can adjust search parameters such as publication dates, the number of matches, and similarity scores, enabling both detailed explorations and broader overviews of laundering operations. + Each match is meticulously documented with titles, sources, and match scores, providing users with the evidence needed to understand and expose information laundering tactics.

+ + + +

Important Warnings for Interpreting Results

+ +

1. Context Matters

+

Limitations of Automated Analysis: While The Information Laundromat uses sophisticated algorithms to identify content similarities and connections, it cannot fully account for the context or intent behind the dissemination of information. Users should consider the broader context in which content is shared and be cautious in drawing conclusions based solely on automated matches.

+ +

2. False Positives and False Negatives

+

Potential for Errors: The tool may produce false positives, where content appears to be laundered but is not, or false negatives, where laundered content is missed. Although we apply stringent filtering to minimize these occurrences, users should critically assess the results and, where possible, corroborate findings with additional sources.

+ +

3. Differentiating Intent

+

No Assumption of Malice: The presence of content from state-sponsored media on a given website does not necessarily imply coordination or intent to deceive. Some websites may unknowingly republish content or do so for reasons unrelated to disinformation. Users should avoid assuming malintent without further evidence.

+ +

4. Attribution Challenges

+

Difficulty in Proving Coordination: Identifying that a website has republished state-sponsored content is not the same as proving that there is a deliberate campaign to launder information. Attribution of intent or coordination requires more in-depth investigation and should not be inferred solely from the data provided by The Information Laundromat.

+ +

5. Evolving Tactics

+

Adaptability of Adversaries: State-sponsored actors and other entities involved in information laundering are continuously evolving their tactics. The information and connections identified by The Information Laundromat reflect a snapshot in time and may not capture the full scope of ongoing disinformation campaigns.

+ +

6. Regional and Cultural Sensitivities

+

Varying Interpretations: The significance and impact of laundered content may vary depending on regional and cultural contexts. Users should be mindful of these differences when interpreting results, especially when analyzing content that crosses linguistic or national boundaries.

+ +

7. Limitations of Search Engines

+

Search Engine Variability: The tool relies on results from various search engines, which may differ in their indexing, ranking, and censorship practices. This variability can affect the results and should be taken into account when interpreting the data.

+ +

8. Use of the Tool as a Starting Point

+

Not a Conclusive Source: The Information Laundromat is intended as a research aid and not as a definitive source of truth. It is best used as a starting point for further investigation rather than a final determination of the origins and spread of content.

+ +
+
+
+ + + + + + + + + + + \ No newline at end of file