diff --git a/adscrawler/app_stores/apple.py b/adscrawler/app_stores/apple.py index 76fe77e9..b2acf04f 100644 --- a/adscrawler/app_stores/apple.py +++ b/adscrawler/app_stores/apple.py @@ -183,25 +183,24 @@ def scrape_store_html(store_id: str, country: str) -> dict: headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(url, headers=headers) - html = response.text - if response.status_code != 200: logger.error(f"Failed to retrieve the page: {response.status_code}") return {} soup = BeautifulSoup(response.text, "html.parser") + in_app_purchase_element = soup.find( - "li", class_="inline-list__item--bulleted", string="Offers In-App Purchases" + "p", class_=["attributes"], string=re.compile(r"Purchases", re.I) ) - has_in_app_purchases = in_app_purchase_element is not None - try: - privacy_details = get_privacy_details(html, country, store_id) - except Exception as e: - logger.error(f"Failed to get privacy details for {store_id=} {country=} {e}") - privacy_details = None + has_in_app_purchases = in_app_purchase_element is not None - has_third_party_advertising = "THIRD_PARTY_ADVERTISING" in str(privacy_details) + purpose_section = soup.find( + "section", class_=lambda classes: classes and "purpose-section" in classes + ) + has_third_party_advertising = ( + "third-party advertising" in purpose_section.get_text(strip=True).lower() + ) urls = get_urls_from_html(soup) @@ -229,70 +228,6 @@ def get_urls_from_html(soup: BeautifulSoup) -> dict: return urls -def get_privacy_details(html: str, country: str, store_id: str) -> dict: - """ - Get privacy details for an iOS app from the App Store. - - Args: - html: HTML of the App Store page - country: Country code (default: 'US') - store_id: App Store ID of the app - - Returns: - True if the app has third-party advertising, False otherwise - """ - - # Extract the token using regex - reg_exp = r"token%22%3A%22([^%]+)%22%7D" - match = re.search(reg_exp, html) - if not match: - raise ValueError("Could not extract token from App Store page") - - token = match.group(1) - - # Make request to the API for privacy details - api_url = f"https://amp-api-edge.apps.apple.com/v1/catalog/{country}/apps/{store_id}?platform=web&fields=privacyDetails" - api_headers = { - "Origin": "https://apps.apple.com", - "Authorization": f"Bearer {token}", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", - } - - response = requests.get(api_url, headers=api_headers) - response.raise_for_status() - - data = response.json() - - if not data.get("data") or len(data["data"]) == 0: - raise ValueError("App not found (404)") - privacy_details: dict = data["data"][0]["attributes"]["privacyDetails"] - return privacy_details - - -def find_privacy_policy_id(soup: BeautifulSoup) -> str | None: - privacy_learn_more = soup.find("p", class_="app-privacy__learn-more") - - if privacy_learn_more: - # Get the anchor tag inside the paragraph - link = privacy_learn_more.find("a") - - if link: - # Extract the full URL - url = link.get("href") - print(f"URL: {url}") - - # Extract the ID from the URL - # The URL format is https://apps.apple.com/story/id1538632801 - if "id" in url: - # Find the position where "id" starts and extract everything after it - id_position = url.find("id") - if id_position != -1: - id_value: str = url[id_position + 2 :] # +2 to skip the "id" prefix - print(f"ID: {id_value}") # This will print: 1538632801 - return id_value - return None - - def get_developer_url(result: dict, urls: dict) -> str: """ Decide if we should crawl the store html for the developer url. @@ -328,12 +263,14 @@ def get_developer_url(result: dict, urls: dict) -> str: return final_url -def scrape_app_ios(store_id: str, country: str, language: str) -> dict: +def scrape_app_ios( + store_id: str, country: str, language: str, scrape_html: bool = False +) -> dict: """Scrape iOS app details from the App Store. yt_us = scrape_app_ios("544007664", "us", language="en") yt_de = scrape_app_ios("544007664", "de", language="en") - # SAME FOR ALL COUNTRIES + ## SAME FOR ALL COUNTRIES yt_de['sellerName'] == yt_us['sellerName'] yt_us['currentVersionReleaseDate'] == yt_de['currentVersionReleaseDate'] @@ -344,7 +281,7 @@ def scrape_app_ios(store_id: str, country: str, language: str) -> dict: yt_de['user_ratings'] != yt_us['user_ratings'] yt_de['description'] != yt_us['description'] - # These very by country but are also the same as each other? + ### These very by country but are also the same as each other? yt_de['userRatingCount'] == yt_de['userRatingCountForCurrentVersion'] @@ -355,7 +292,9 @@ def scrape_app_ios(store_id: str, country: str, language: str) -> dict: result_dict: dict = scraper.get_app_details( store_id, country=country, add_ratings=True, timeout=10, lang=language ) - logger.info(f"store=2 {country=} {language=} {store_id=} ios store scraped") + if scrape_html: + result_dict = scrape_itunes_additional_html(result_dict, store_id, country) + logger.debug(f"store=2 {country=} {language=} {store_id=} ios store scraped") return result_dict @@ -366,9 +305,9 @@ def scrape_itunes_additional_html(result: dict, store_id: str, country: str) -> result["in_app_purchases"] = html_res["in_app_purchases"] result["ad_supported"] = html_res["ad_supported"] result["sellerUrl"] = get_developer_url(result, html_res["urls"]) + result["additional_html_scraped_at"] = datetime.datetime.now(tz=datetime.UTC) except Exception as e: logger.warning(f"Failed to get developer url for {store_id=} {country=} {e}") - result["additional_html_scraped_at"] = datetime.datetime.now(tz=datetime.UTC) return result @@ -477,6 +416,16 @@ def clean_ios_app_df(df: pd.DataFrame) -> pd.DataFrame: df.loc[ df["store_language_code"].str.startswith("zh-"), "store_language_code" ] = "zh" + # Fix .0 int to string issues + # Mixing nulls in and ints cause .0 to be added to the end of the string + problem_rows = df["developer_id"].str.contains(".0") + if problem_rows.any(): + logger.warning( + f'Found {problem_rows.sum()} developer_id with ".0" suffix, fixing' + ) + df.loc[problem_rows, "developer_id"] = ( + df.loc[problem_rows, "developer_id"].str.split(".").str[0] + ) return df diff --git a/adscrawler/app_stores/google.py b/adscrawler/app_stores/google.py index 2ad516b0..f6f73bb6 100644 --- a/adscrawler/app_stores/google.py +++ b/adscrawler/app_stores/google.py @@ -16,14 +16,14 @@ def scrape_app_gp(store_id: str, country: str, language: str = "en") -> dict: yt_us = scrape_app_gp("com.google.android.youtube", "us", language="en") yt_de = scrape_app_gp("com.google.android.youtube", "mx", language="en") - # SAME FOR ALL COUNTRIES + ## SAME FOR ALL COUNTRIES yt_us["ratings"] == yt_de["ratings"] yt_us["realInstalls"] == yt_de["realInstalls"] yt_us["updated"] == yt_de["updated"] - # MOSTLY SAME FOR ALL COUNTRIES - # Almost always lower for smaller countries - # looks more like delays and incomplete (0s) + ## MOSTLY SAME FOR ALL COUNTRIES + ## Almost always lower for smaller countries + ## looks more like delays and incomplete (0s) yt_us["histogram"] == yt_de["histogram"] @@ -31,8 +31,6 @@ def scrape_app_gp(store_id: str, country: str, language: str = "en") -> dict: yt_us["reviews"] != yt_de["reviews"] yt_us["score"] != yt_de["score"] - - ## UNIQUE PER LANGUAGE yt_us["description"] == yt_de["description"] yt_us["description"] == yt_de_en["description"] @@ -43,7 +41,7 @@ def scrape_app_gp(store_id: str, country: str, language: str = "en") -> dict: country=country, timeout=10, ) - logger.info(f"store=1 {country=} {language=} {store_id=} play store scraped") + logger.debug(f"store=1 {country=} {language=} {store_id=} play store scraped") return result_dict @@ -161,14 +159,14 @@ def get_js_data(filepath: str, is_json: bool = True) -> list[dict] | list: def scrape_google_ranks(country: str) -> list[dict]: - logger.info("Scrape Google ranks start") + logger.info(f"Scrape Google ranks {country=} start") filepath = f"/tmp/googleplay_json_{country}.txt" try: call_js_to_update_file(filepath, country) except Exception as error: - logger.exception(f"JS pull failed with {error=}") + logger.exception(f"JS pull failed with {country=} {error=}") ranked_dicts = get_js_data(filepath) - logger.info(f"Scrape Google ranks finished: {len(ranked_dicts)}") + logger.info(f"Scrape Google ranks {country=} finished: {len(ranked_dicts)}") return ranked_dicts diff --git a/adscrawler/app_stores/process_from_s3.py b/adscrawler/app_stores/process_from_s3.py index 0e4e42bc..4c74d542 100644 --- a/adscrawler/app_stores/process_from_s3.py +++ b/adscrawler/app_stores/process_from_s3.py @@ -15,7 +15,11 @@ get_db_connection, ) from adscrawler.dbcon.queries import ( + clean_app_ranks_weekly_table, delete_and_insert, + get_latest_app_country_history, + get_retention_benchmarks, + query_apps_to_process_global_metrics, query_categories, query_collections, query_countries, @@ -56,9 +60,17 @@ "store_app", ] -COUNTRY_HISTORY_COLS = COUNTRY_HISTORY_KEYS + METRIC_COLS +COUNTRY_HISTORY_COLS = ( + COUNTRY_HISTORY_KEYS + + [x for x in METRIC_COLS if x != "installs"] + + ["installs_est"] +) -GLOBAL_HISTORY_COLS = GLOBAL_HISTORY_KEYS + METRIC_COLS + ["store_last_updated"] +GLOBAL_HISTORY_COLS = ( + GLOBAL_HISTORY_KEYS + + METRIC_COLS + + ["store_last_updated", "tier1_pct", "tier2_pct", "tier3_pct"] +) def raw_keywords_to_s3( @@ -157,18 +169,6 @@ def get_s3_rank_parquet_paths( return all_parquet_paths -def get_s3_app_details_parquet_paths( - snapshot_date: pd.DatetimeIndex, - store: int, -) -> list[str]: - bucket = CONFIG["s3"]["bucket"] - all_parquet_paths = [] - ddt_str = snapshot_date.strftime("%Y-%m-%d") - prefix = f"raw-data/app_details/store={store}/crawled_date={ddt_str}/country=" - all_parquet_paths += get_parquet_paths_by_prefix(bucket, prefix) - return all_parquet_paths - - def get_s3_agg_app_snapshots_parquet_paths( bucket: str, start_date: pd.DatetimeIndex, @@ -229,17 +229,18 @@ def check_for_duplicates(df: pd.DataFrame, key_columns: list[str]) -> None: def make_s3_app_country_metrics_history( store: int, snapshot_date: pd.DatetimeIndex ) -> None: - app_detail_parquets = get_s3_app_details_parquet_paths( - snapshot_date=snapshot_date, - store=store, - ) + s3_config_key = "s3" + bucket = CONFIG[s3_config_key]["bucket"] snapshot_date_str = snapshot_date.strftime("%Y-%m-%d") + prefix = ( + f"raw-data/app_details/store={store}/crawled_date={snapshot_date_str}/country=" + ) + app_detail_parquets = get_parquet_paths_by_prefix(bucket, prefix) if len(app_detail_parquets) == 0: logger.error( f"No app detail parquet files found for store={store} snapshot_date={snapshot_date_str}" ) return - s3_config_key = "s3" query = app_details_country_history_query( store=store, app_detail_parquets=app_detail_parquets, @@ -250,12 +251,46 @@ def make_s3_app_country_metrics_history( duckdb_con.close() +def estimate_ios_installs(df): + # For iOS we don't have installs, but we can estimate them using the review count and a conversion rate + # This is a very rough estimate and should be replaced with actual install data if possible + conversion_rate = 0.02 + df["installs_est"] = (df["rating_count"] / conversion_rate).fillna(0).astype(int) + return df + + def prep_app_metrics_history( - df: pd.DataFrame, store: int, database_connection: PostgresCon + df: pd.DataFrame, + store: int, + database_connection: PostgresCon, + snapshot_date: datetime.date, ) -> pd.DataFrame: + store_id_map = query_store_id_map_cached( + store=store, database_connection=database_connection + ) + df = pd.merge( + df, + store_id_map[["store_id", "id"]].rename(columns={"id": "store_app"}), + on="store_id", + how="left", + validate="m:1", + ) + if df["store_app"].isna().any(): + logger.warning(f"Found new store ids: {len(df[df['store_app'].isna()])}") + raise ValueError("New store ids found in S3 app history data") + country_map = query_countries(database_connection) + df = pd.merge( + df, + country_map[["id", "alpha2", "tier"]].rename( + columns={"id": "country_id", "alpha2": "country"} + ), + on="country", + how="left", + validate="m:1", + ) if store == 1: df["store_last_updated"] = np.where( - (df["store_last_updated"] < 0) | (df["store_last_updated"].isna()), + (df["store_last_updated"].isna() | df["store_last_updated"] < 0), None, df["store_last_updated"], ) @@ -263,27 +298,72 @@ def prep_app_metrics_history( df.loc[df["store_last_updated"].notna(), "store_last_updated"], unit="s", ) + google_app_country_history = get_latest_app_country_history( + database_connection, + snapshot_date=snapshot_date, + days_back=180, + chunk_size=5000, + store_app_ids=df["store_app"].unique(), + ) + # If data is being rerun, prefer 'newer' data from S3 + df["crawled_date"] = df["crawled_date"] + pd.Timedelta(seconds=1) + cdf = pd.concat([df, google_app_country_history], axis=0) + cdf = cdf.sort_values(by=["crawled_date"], ascending=True).drop_duplicates( + subset=["store_app", "country_id"], keep="last" + ) + cdf["review_count"] = pd.to_numeric( + cdf["review_count"], errors="coerce" + ).fillna(0) + cdf["installs"] = pd.to_numeric(cdf["installs"], errors="coerce").fillna(0) + # 1. Find the maximum review_count for each app + cdf["max_reviews"] = cdf.groupby("store_app")["review_count"].transform("max") + cdf["max_installs"] = cdf.groupby("store_app")["installs"].transform("max") + # 2. Flag as global if it's within a 1% threshold of the max + cdf["is_global_fallback"] = ( + cdf["review_count"] >= cdf["max_reviews"] * 0.99 + ) & (cdf["max_reviews"] > 200) + cdf["true_global_count"] = np.where( + cdf["is_global_fallback"], cdf["max_reviews"], None + ) + cdf["true_global_count"] = cdf.groupby("store_app")[ + "true_global_count" + ].transform("max") + # Remove review_count where it's just the global_fallback + cdf["review_count"] = np.where( + cdf["is_global_fallback"], 0, cdf["review_count"] + ) + # Calculate local share only for unique, non-fallback rows + # Use .replace(0, np.nan) to handle divide-by-zero safely + cdf["pct_of_global"] = ( + cdf["review_count"] / cdf["true_global_count"].replace(0, np.nan) + ).fillna(0) + cdf["installs_est"] = ( + (cdf["installs"] * cdf["pct_of_global"]).round().astype(int) + ) + df = pd.merge( + df, + cdf[["store_app", "country_id", "review_count", "installs_est"]], + on=["store_app", "country_id"], + how="left", + suffixes=("", "_y"), + validate="1:1", + ) + df = df.drop(columns=["review_count"]).rename( + columns={"review_count_y": "review_count"} + ) if store == 2: - ratings_str = df["user_ratings"].str.extractall(r"(\d+)").unstack() - ratings_str = ratings_str.reindex(df.index, fill_value=0) + ratings_str = ( + df["user_ratings"] + .str.extractall(r"(\d+)") + .unstack() + .astype("Int64") + .reindex(df.index, fill_value=0) + ) df[STAR_COLS] = ratings_str.iloc[:, 1::2].astype(int).to_numpy() df["store_last_updated"] = pd.to_datetime( df["store_last_updated"], format="ISO8601", utc=True ) - store_id_map = query_store_id_map_cached( - store=store, database_connection=database_connection - ) - country_map = query_countries(database_connection) - df["country_id"] = df["country"].map( - country_map.set_index("alpha2")["id"].to_dict() - ) - new_ids = df[~df["store_id"].isin(store_id_map["store_id"])]["store_id"].unique() - if len(new_ids) > 0: - logger.warning(f"Found new store ids: {len(new_ids)}") - raise ValueError("New store ids found in S3 app history data") - df["store_app"] = df["store_id"].map( - store_id_map.set_index("store_id")["id"].to_dict() - ) + df = estimate_ios_installs(df) df = df.convert_dtypes(dtype_backend="pyarrow") df = df.replace({pd.NA: None}) return df @@ -297,10 +377,12 @@ def manual_import_app_metrics_from_s3( use_ssh_tunnel=use_tunnel, config_key="madrone" ) - start_date = datetime.datetime.fromisoformat("2025-10-01").date() + start_date = datetime.datetime.fromisoformat("2026-01-21").date() + end_date = datetime.datetime.today().date() for snapshot_date in pd.date_range(start_date, end_date, freq="D"): snapshot_date = snapshot_date.date() - for store in [1, 2]: + # for store in [1, 2]: + for store in [2]: try: process_app_metrics_to_db(database_connection, store, snapshot_date) except: @@ -319,6 +401,7 @@ def import_app_metrics_from_s3( logger.error( f"Error processing S3 app metrics for {snapshot_date} {store}: {e}" ) + import_all_app_global_metrics_weekly(database_connection) def process_app_metrics_to_db( @@ -338,7 +421,7 @@ def process_app_metrics_to_db( problem_rows = df["store_id"].str.contains(".0") if problem_rows.any(): logger.warning( - f'Apple App IDs: Found {problem_rows.sum()} store_id with ".0" suffix, fixing' + f'Apple App IDs: Found {problem_rows.sum()}/{df.shape[0]} store_id with ".0" suffix, fixing' ) df.loc[problem_rows, "store_id"] = ( df.loc[problem_rows, "store_id"].str.split(".").str[0] @@ -349,24 +432,20 @@ def process_app_metrics_to_db( ) logger.info(f"date={snapshot_date}, store={store} agg df prep") df = prep_app_metrics_history( - df=df, store=store, database_connection=database_connection + df=df, + store=store, + database_connection=database_connection, + snapshot_date=snapshot_date, ) if not df[df["store_id"].isna()].empty: - # Why are there many records with missing store_id? + # Why are there any records with missing store_id? logger.warning("Found records with missing store_id") raise ValueError("Records with missing store_id found in S3 app history data") check_for_duplicates( df=df, key_columns=COUNTRY_HISTORY_KEYS, ) - # TESTING ONLY, ignore new apps since devdb is not updated - df = df[df["store_app"].notna()] insert_columns = [x for x in COUNTRY_HISTORY_COLS if x in df.columns] - if store == 1: - # TODO: Can get installs per Country by getting review_count sum for all countries - # Cannot do with this data set since it is snapshot_date only - # and not every app for every country crawled on this date - insert_columns = [x for x in insert_columns if x != "installs"] logger.info(f"date={snapshot_date}, store={store} agg df country upsert") upsert_df( df=df, @@ -375,26 +454,73 @@ def process_app_metrics_to_db( key_columns=COUNTRY_HISTORY_KEYS, insert_columns=insert_columns, ) + tier_installs = df.pivot_table( + index=GLOBAL_HISTORY_KEYS, + columns="tier", + values="installs_est", + aggfunc="sum", + fill_value=0, + ) + tier_pct = tier_installs.div(tier_installs.sum(axis=1), axis=0) + tier_pct = tier_pct.add_suffix("_pct") + tier_pct = tier_pct.fillna(0) if store == 1: - df = df[df["country"] == "US"] - df[STAR_COLS] = df["histogram"].apply(pd.Series) + global_reviews = df.groupby(GLOBAL_HISTORY_KEYS)["review_count"].sum() + df = df[df["country"] == "US"].copy() + df = df.set_index(GLOBAL_HISTORY_KEYS) + df["us_review_count"] = df["review_count"] + df["review_count"] = global_reviews + hist_df = pd.DataFrame(df["histogram"].tolist(), index=df.index) + hist_df.columns = STAR_COLS + df = pd.concat([df, hist_df], axis=1) if store == 2: - weighted_sum = ( - (df["rating"] * df["rating_count"]) - .groupby([df[k] for k in GLOBAL_HISTORY_KEYS]) - .sum() + ios_app_country_history = get_latest_app_country_history( + database_connection, + snapshot_date=snapshot_date, + days_back=180, + chunk_size=5000, + store_app_ids=df["store_app"].unique(), ) - weight_total = ( - df["rating_count"].groupby([df[k] for k in GLOBAL_HISTORY_KEYS]).sum() + # Should overwrite existing values if there, but could be removed when all rerun + ios_app_country_history = estimate_ios_installs(ios_app_country_history) + # For the group by this will need to be the snapshot_date being calculated for + ios_app_country_history["snapshot_date"] = pd.to_datetime(snapshot_date) + df = pd.concat([ios_app_country_history, df], axis=0) + df = df.sort_values(by=["crawled_date"], ascending=True).drop_duplicates( + subset=["store_app", "country_id"], keep="last" ) + countries_map = query_countries(database_connection) + df = df.merge( + countries_map[["id", "tier"]], + left_on="country_id", + right_on="id", + how="left", + validate="m:1", + ) + # weighted_sum = ( + # (df["rating"] * df["rating_count"]) + # .groupby([df[k] for k in GLOBAL_HISTORY_KEYS]) + # .sum() + # ) + # weight_total = ( + # df["rating_count"].groupby([df[k] for k in GLOBAL_HISTORY_KEYS]).sum() + # ) + df["rating_prod"] = df["rating"] * df["rating_count"] df = df.groupby(GLOBAL_HISTORY_KEYS).agg( rating_count=("rating_count", "sum"), + rating_prod=("rating_prod", "sum"), store_last_updated=("store_last_updated", "max"), **{col: (col, "sum") for col in STAR_COLS}, ) - df["rating"] = weighted_sum / weight_total - df["rating"] = df["rating"].astype("float64") - df = df.reset_index() + # df["rating"] = weighted_sum / weight_total + df["rating"] = ( + df["rating_prod"] / df["rating_count"].replace(0, np.nan) + ).astype("float64") + # df["rating"] = df["rating"].astype("float64") + df = df.join(tier_pct) + df = df.reset_index() + for col in ["tier1_pct", "tier2_pct", "tier3_pct"]: + df[col] = (df[col] * 10000).round().astype("int16") check_for_duplicates( df=df, key_columns=GLOBAL_HISTORY_KEYS, @@ -542,6 +668,7 @@ def import_ranks_from_s3( period=period, s3_config_key=s3_config_key, ) + clean_app_ranks_weekly_table(database_connection) def process_ranks_from_s3( @@ -615,9 +742,10 @@ def process_ranks_from_s3( store_id_map.set_index("store_id")["id"].to_dict() ) wdf = wdf.drop(columns=["store_id", "collection", "category"]) + table_name = f"store_app_ranks_{table_suffix}" upsert_df( df=wdf, - table_name=f"store_app_ranks_{table_suffix}", + table_name=table_name, schema="frontend", database_connection=database_connection, key_columns=[ @@ -730,19 +858,24 @@ def import_keywords_from_s3( crawl_source="keywords", store=store, ) - store_id_map = query_store_id_map_cached(database_connection, store) + store_id_map = query_store_id_map(database_connection, store) df["store_app"] = df["store_id"].map( store_id_map.set_index("store_id")["id"].to_dict() ) + df["store"] = store + logger.info( + f"Keywords from S3 insert {snapshot_date} {store=} {df.shape[0]} rows" + ) delete_and_insert( df=df, table_name="app_keyword_ranks_daily", schema="frontend", database_connection=database_connection, - delete_by_keys=["crawled_date"], + delete_by_keys=["crawled_date", "store"], insert_columns=[ "country", "keyword_id", + "store", "crawled_date", "store_app", "app_rank", @@ -786,3 +919,145 @@ def query_keywords_from_s3( """ duckdb_con = get_duckdb_connection(s3_config_key) return duckdb_con.execute(period_query).df() + + +def get_next_app_global_metrics_weekly( + database_connection: PostgresCon, df: pd.DataFrame +) -> pd.DataFrame: + """Process app global metrics weekly.""" + star_cols = ["one_star", "two_star", "three_star", "four_star", "five_star"] + metrics = ["installs", "rating", "review_count", "rating_count", *star_cols] + xaxis_col = "snapshot_date" + # Convert to date to datetime and sort by country and date, required + df[xaxis_col] = pd.to_datetime(df[xaxis_col]) + df = df.sort_values(["store_app", xaxis_col]) + df = ( + df.set_index(xaxis_col) + .groupby(["store_app", "store", "app_category"])[metrics] + .resample("W-MON") + .last() + .apply(pd.to_numeric, errors="coerce") + .interpolate(method="linear", limit_direction="forward") + .fillna(0) + .reset_index() + ) + df["installs_diff"] = df.groupby("store_app")["installs"].diff().fillna(0) + df["installs_diff"] = ( + df.groupby("store_app")["installs"].diff().fillna(df["installs"]).fillna(0) + ) + retention_benchmarks = get_retention_benchmarks(database_connection) + merged_df = df.merge(retention_benchmarks, on=["app_category", "store"], how="left") + merged_df["k"] = np.log( + merged_df["d30"].replace(0, np.nan) / merged_df["d7"].replace(0, np.nan) + ) / np.log(30.0 / 7.0) + cohorts = merged_df[ + ["store_app", "snapshot_date", "installs_diff", "d1", "d7", "k"] + ].copy() + logger.info("Historical merge, memory intensive step") + ddf = cohorts.merge( + cohorts[["store_app", "snapshot_date", "installs_diff"]], + on="store_app", + suffixes=("", "_historical"), + ) + ddf = ddf[ddf["snapshot_date"] >= ddf["snapshot_date_historical"]] + ddf["weeks_passed"] = ( + (ddf["snapshot_date"] - ddf["snapshot_date_historical"]).dt.days / 7 + ).astype(int) + wau_mult = 2.0 + ddf["retention_rate"] = np.where( + ddf["weeks_passed"] == 0, + 1.0, + (ddf["d7"] * wau_mult * (ddf["weeks_passed"].replace(0, 1) ** ddf["k"])).clip( + upper=1.0 + ), + ) + ddf["surviving_users"] = ddf["installs_diff_historical"] * ddf["retention_rate"] + mau_mult = 3.5 # Standard estimate for Monthly Reach + # Calculate MAU Retention Rate + ddf["retention_rate_mau"] = np.where( + ddf["weeks_passed"] == 0, + 1.0, + (ddf["d7"] * mau_mult * (ddf["weeks_passed"].replace(0, 1) ** ddf["k"])).clip( + upper=1.0 + ), + ) + ddf["surviving_mau"] = ddf["installs_diff_historical"] * ddf["retention_rate_mau"] + ddf = ( + ddf.groupby(["store_app", "snapshot_date"])[ + ["surviving_users", "surviving_mau"] + ] + .sum() + .reset_index() + ) + ddf.rename(columns={"surviving_users": "wau", "surviving_mau": "mau"}, inplace=True) + cols = ["store_app", "snapshot_date", "installs_diff"] + metrics + df = pd.merge( + df[cols], ddf, on=["store_app", "snapshot_date"], how="left", validate="1:1" + ) + logger.info("Finished calculating WAU") + df["weekly_ratings"] = ( + df.groupby("store_app")["rating_count"].diff().fillna(df["rating_count"]) + ) + df["weekly_reviews"] = ( + df.groupby("store_app")["review_count"].diff().fillna(df["review_count"]) + ) + rename_map = { + "snapshot_date": "week_start", + "installs_diff": "weekly_installs", + "wau": "weekly_active_users", + "mau": "monthly_active_users", + "installs": "total_installs", + "rating_count": "total_ratings_count", + } + df = df.rename(columns=rename_map) + df["weekly_iap_revenue"] = 0.0 + df["weekly_ad_revenue"] = 0.0 + final_cols = [ + "store_app", + "week_start", + "weekly_installs", + "weekly_ratings", + "weekly_reviews", + "weekly_active_users", + "monthly_active_users", + "weekly_iap_revenue", + "weekly_ad_revenue", + "total_installs", + "total_ratings_count", + "rating", + "one_star", + "two_star", + "three_star", + "four_star", + "five_star", + ] + df = df[final_cols] + return df + + +def import_all_app_global_metrics_weekly(database_connection: PostgresCon) -> None: + i = 0 + while True: + logger.info(f"batch {i} of app global metrics weekly start") + df = query_apps_to_process_global_metrics(database_connection, batch_size=5000) + apps = df["store_app"].tolist() + df = get_next_app_global_metrics_weekly(database_connection, df) + if df.empty: + break + upsert_df( + df=df, + table_name="app_global_metrics_weekly", + database_connection=database_connection, + key_columns=["store_app", "week_start"], + insert_columns=df.columns.tolist(), + ) + log_df = pd.DataFrame({"store_app": apps}) + log_df["updated_at"] = datetime.datetime.now() + log_df.to_sql( + name="app_global_metrics_weekly", + con=database_connection.engine, + schema="logging", + if_exists="append", + index=False, + ) + i += 1 diff --git a/adscrawler/app_stores/process_keywords.py b/adscrawler/app_stores/process_keywords.py index 847ef656..e8add5a8 100644 --- a/adscrawler/app_stores/process_keywords.py +++ b/adscrawler/app_stores/process_keywords.py @@ -6,6 +6,7 @@ import pandas as pd +from adscrawler.config import get_logger from adscrawler.dbcon.connection import PostgresCon from adscrawler.dbcon.queries import ( delete_and_insert, @@ -15,8 +16,6 @@ upsert_df, ) -from adscrawler.config import get_logger - logger = get_logger(__name__) # Custom stopwords to remove personal pronouns & other irrelevant words @@ -37,8 +36,14 @@ "we", "application", "one", + "ones", + "dont", + "us", + "takes", + "take", "them", "use", + "uses", "need", "get", "who", @@ -73,23 +78,79 @@ def clean_text(text: str) -> str: return re.sub(r"[^a-zA-Z\s]", ". ", text.lower()) -def clean_df_text(df: pd.DataFrame, column: str) -> pd.DataFrame: - # Note these are same as clean_text function +def clean_series_text(s: pd.Series) -> pd.Series: + import emoji + + # 1. Replace emojis with periods + # emoji_regex = re.compile( + # "|".join(re.escape(e) for e in emoji.EMOJI_DATA.keys()) + # ) + # s = s.str.replace(emoji_regex, ". ", regex=True) + s = s.map(lambda x: emoji.replace_emoji(x, replace=". ")) + logger.info("Replaced emojis with periods") + + # 2. Convert all structural separators and whitespace-like noise to periods + # This handles \r, \n, \t, \xa0, and bullets in one pass + s = s.str.replace(r"[\r\n\t\xa0•]+", ". ", regex=True) + + # 3. Handle Apostrophes and Hyphens specifically (don't turn them into periods) + s = s.str.replace(r"['’]", "", regex=True) + s = s.str.replace(r"-", " ", regex=True) + + # 4. Remove URLs + s = s.str.replace(r"\b(?:http|www)\S*", "", regex=True, flags=re.IGNORECASE) + + # 5. Replace everything else that isn't a letter or space with a period + # We remove \s from the exclusion so newlines/tabs are finally nuked if any remain + s = s.str.replace(r"[^a-zA-Z ]", ". ", regex=True) + + # 6. Final Cleanup: lowercase and collapse multiple spaces/periods + # "Design. . . . Community" -> "design. community" + s = ( + s.str.lower() + .str.replace(r"\s+", " ", regex=True) + .str.replace(r"\.+", ".", regex=True) + ) + + return s + + +def clean_df_text(df: pd.DataFrame, column: str, lang="en") -> pd.DataFrame: + import emoji + + s = df[column].astype(str) + + # 1. Replace emojis with periods + # emoji_regex = re.compile( + # "|".join(re.escape(e) for e in emoji.EMOJI_DATA.keys()) + # ) + # s = s.str.replace(emoji_regex, ". ", regex=True) + s = s.map(lambda x: emoji.replace_emoji(x, replace=". ")) + logger.info("Replaced emojis with periods") + + # 2. Convert all structural separators and whitespace-like noise to periods + # This handles \r, \n, \t, \xa0, and bullets in one pass + s = s.str.replace(r"[\r\n\t\xa0•]+", ". ", regex=True) + + # 3. Handle Apostrophes and Hyphens specifically (don't turn them into periods) + s = s.str.replace(r"['’]", "", regex=True) + s = s.str.replace(r"-", " ", regex=True) + + # 4. Remove URLs + s = s.str.replace(r"\b(?:http|www)\S*", "", regex=True, flags=re.IGNORECASE) + + # 5. Replace everything else that isn't a letter or space with a period + # We remove \s from the exclusion so newlines/tabs are finally nuked if any remain + s = s.str.replace(r"[^a-zA-Z ]", ". ", regex=True) + + # 6. Final Cleanup: lowercase and collapse multiple spaces/periods + # "Design. . . . Community" -> "design. community" df[column] = ( - df[column] - .str.replace("\r", ". ") - .replace("\n", ". ") - .replace("\t", ". ") - .replace("\xa0", ". ") - .replace("•", ". ") - .replace("'", "") - .replace("’", "") - .replace("-", " ") - .replace(r"\bhttp\S*", "", regex=True) - .replace(r"\bwww\S*", "", regex=True) - .replace(r"[^a-zA-Z\s]", ". ", regex=True) - .str.lower() + s.str.lower() + .str.replace(r"\s+", " ", regex=True) + .str.replace(r"\.+", ".", regex=True) ) + return df @@ -170,12 +231,24 @@ def extract_keywords_nltk(text: str, top_n: int = 10) -> list[str]: return [word for word, freq in word_freq.most_common(top_n)] +def get_stopwords() -> set[str]: + """Get the stopwords from NLTK and spaCy.""" + import spacy + from nltk.corpus import stopwords + + nlp = spacy.load("en_core_web_sm") + spacy_stopwords = nlp.Defaults.stop_words + mystopwords = ( + set(stopwords.words("english")).union(CUSTOM_STOPWORDS).union(spacy_stopwords) + ) + return list(mystopwords) + + def extract_keywords_rake(text: str, top_n: int = 10, max_tokens: int = 3) -> list[str]: """Extracts keywords using RAKE with token limit.""" - from nltk.corpus import stopwords from rake_nltk import Rake - mystopwords = set(stopwords.words("english")).union(CUSTOM_STOPWORDS) + mystopwords = get_stopwords() r = Rake() r.extract_keywords_from_text(text) @@ -197,9 +270,8 @@ def extract_unique_app_keywords_from_text( max_tokens: int = 1, ) -> list[str]: """Extracts keywords using spaCy, NLTK, and RAKE, then returns a unique set.""" - from nltk.corpus import stopwords - mystopwords = set(stopwords.words("english")).union(CUSTOM_STOPWORDS) + mystopwords = get_stopwords() text = clean_text(text) words_spacy = extract_keywords_spacy(text, top_n, max_tokens) @@ -229,27 +301,69 @@ def extract_unique_app_keywords_from_text( return combined_keywords +def pos_filter_descriptions(texts: pd.Series, batch_size=1000) -> list[str]: + # Load light model; disable everything but the tagger (for POS) and lemmatizer + import spacy + import tqdm + + nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"]) + processed_texts = [] + # for doc in nlp.pipe(texts, batch_size=batch_size, n_process=4): + for doc in tqdm.tqdm( + nlp.pipe(texts, batch_size=batch_size, n_process=12), total=len(texts) + ): + # Keep only Nouns, Proper Nouns, and Adjectives + tokens = [ + token.lemma_.lower() # Use lemma to group 'games' and 'game' + for token in doc + if token.pos_ in {"NOUN", "PROPN", "ADJ"} and not token.is_stop + ] + processed_texts.append(" ".join(tokens)) + return processed_texts + + def get_global_keywords(database_connection: PostgresCon) -> list[str]: """Get the global keywords from the database. NOTE: This takes about ~5-8GB of RAM for 50k keywords and 200k descriptions. For now run manually. """ - from nltk.corpus import stopwords - mystopwords = set(stopwords.words("english")).union(CUSTOM_STOPWORDS) + from multiprocessing import Pool + + from sklearn.feature_extraction.text import TfidfVectorizer # noqa: PLC0415 + + mystopwords = get_stopwords() + df = query_all_store_app_descriptions( language_slug="en", database_connection=database_connection ) - df = clean_df_text(df, "description") + # df = pd.read_pickle("descriptions_df.pkl") - from sklearn.feature_extraction.text import TfidfVectorizer # noqa: PLC0415 + # Number of chunks + n_chunks = 8 + chunk_size = len(df) // n_chunks + 1 + chunks = [ + df["description"].iloc[i * chunk_size : (i + 1) * chunk_size] + for i in range(n_chunks) + ] + with Pool(n_chunks) as pool: + cleaned_chunks = pool.map(clean_series_text, chunks) + # Combine back into one Series + df["description"] = pd.concat(cleaned_chunks).reset_index(drop=True) + + # df.to_pickle("descriptions_df_cleaned.pkl") + # df = pd.read_pickle("descriptions_df_cleaned.pkl") + + df["description"] = pos_filter_descriptions(df["description"]) + # df.to_pickle("descriptions_df_cleaned_pos_filtered.pkl") + # df = pd.read_pickle("descriptions_df_cleaned_pos_filtered.pkl") vectorizer = TfidfVectorizer( - ngram_range=(1, 2), # Include 1-grams, 2-grams + ngram_range=(1, 3), # Include 1-grams, 2-grams, 3-grams stop_words=list(mystopwords), - max_df=0.75, # Ignore terms in >75% of docs (too common) - min_df=300, # Ignore terms in list[str]: keyword_scores = list(zip(feature_names, global_scores, strict=False)) keyword_scores.sort(key=lambda x: x[1], reverse=True) global_keywords = [kw for kw, score in keyword_scores if kw not in mystopwords] - return global_keywords + return df, global_keywords def insert_global_keywords(database_connection: PostgresCon) -> None: @@ -286,6 +400,10 @@ def insert_global_keywords(database_connection: PostgresCon) -> None: table_name = "keywords_base" insert_columns = ["keyword_id"] key_columns = ["keyword_id"] + + keywords_df.head() + + # Replace old base keywords keywords_df.to_sql( name=table_name, con=database_connection.engine, @@ -311,7 +429,9 @@ def extract_app_keywords_from_descriptions( database_connection: PostgresCon, limit: int ) -> None: """Process keywords for app descriptions.""" + description_df = query_apps_to_process_keywords(database_connection, limit=limit) + keywords_base = query_keywords_base(database_connection) keywords_base["keyword_text"] = ( " " + keywords_base["keyword_text"].str.lower() + " " @@ -323,7 +443,9 @@ def extract_app_keywords_from_descriptions( + description_df["description"] + " " ).str.lower() - description_df = clean_df_text(description_df, "description_text") + description_df["description_text"] = clean_series_text( + description_df["description_text"] + ) all_keywords_dfs = [] logger.info(f"Processing {len(description_df)} app descriptions") for _i, row in description_df.iterrows(): @@ -357,3 +479,15 @@ def extract_app_keywords_from_descriptions( delete_by_keys=["store_app"], delete_keys_have_duplicates=True, ) + table_name = "app_description_keywords_extracted" + apps_extracted_df = main_keywords_df[ + ["store_app", "description_id", "extracted_at"] + ].drop_duplicates() + apps_extracted_df["extracted_at"] = apps_extracted_df["extracted_at"] + apps_extracted_df.to_sql( + name=table_name, + con=database_connection.engine, + if_exists="append", + index=False, + schema="logging", + ) diff --git a/adscrawler/app_stores/scrape_stores.py b/adscrawler/app_stores/scrape_stores.py index cb3741fb..1936f8ec 100644 --- a/adscrawler/app_stores/scrape_stores.py +++ b/adscrawler/app_stores/scrape_stores.py @@ -1,7 +1,9 @@ import datetime import pathlib +import random +import ssl import time -from concurrent.futures import ProcessPoolExecutor, as_completed +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed from io import BytesIO from urllib.error import URLError from urllib.parse import unquote_plus @@ -55,7 +57,6 @@ query_languages, query_store_apps_to_update, query_store_id_map, - query_store_id_map_cached, query_store_ids, update_from_df, upsert_df, @@ -65,45 +66,135 @@ logger = get_logger(__name__, "scrape_stores") -def process_chunk( +def _scrape_single_app( + row: pd.Series, + store: int, + process_icon: bool, + chunk_info: str, + use_thread_jitter: bool, +) -> dict | None: + """Helper function to scrape a single app - used by ThreadPoolExecutor.""" + if use_thread_jitter: + # Add small random jitter to avoid SSL connection conflicts + time.sleep(random.uniform(0.05, 0.2)) + + try: + result = scrape_app( + store=store, + store_id=row["store_id"], + country=row["country_code"].lower(), + language=row["language"].lower(), + html_last_scraped_at=row.get("html_last_scraped_at", None), + ) + result["store_app_db_id"] = row["store_app"] + if process_icon: + result["icon_url_100"] = row.get("icon_url_100", None) + return result + except Exception as e: + logger.exception( + f"{chunk_info} store_id={row['store_id']} scrape_app failed: {e}" + ) + return None + + +def process_scrape_apps_and_save( df_chunk: pd.DataFrame, store: int, use_ssh_tunnel: bool, process_icon: bool, - total_rows: int, + thread_workers: int, + total_rows: int | None = None, ) -> None: - chunk_info = f"{store=} chunk={df_chunk.index[0]}-{df_chunk.index[-1]}/{total_rows}" - logger.info(f"{chunk_info} start") + """Process a chunk of apps, scrape app, store to S3 and if country === US store app details to db store_apps table. + + Args: + df_chunk: DataFrame of apps to process, needs to have columns: store_id, country_code, language, icon_url_100 + store: Store ID + use_ssh_tunnel: Whether to use SSH tunnel + process_icon: Whether to process app icons + thread_workers: Number of threads to use for parallel scraping within this process + total_rows: Total number of apps in the chunk, if None, will be calculated from df_chunk + """ + if total_rows is None: + total_rows = len(df_chunk) + chunk_info = f"{store=} process_scrape_apps_and_save chunk={df_chunk.index[0]}-{df_chunk.index[-1]}/{total_rows}" + + # Store 1 (Google Play) uses sequential processing to avoid SSL issues + # Store 2 (Apple) can use threading + use_threading = store == 2 and thread_workers > 1 + + if use_threading: + logger.info(f"{chunk_info} start with {thread_workers} threads") + else: + logger.info(f"{chunk_info} start (sequential)") + database_connection = get_db_connection(use_ssh_tunnel=use_ssh_tunnel) chunk_results = [] + try: - for _, row in df_chunk.iterrows(): - try: - result = scrape_app( - store=store, - store_id=row["store_id"], - country=row["country_code"].lower(), - language=row["language"].lower(), - ) - chunk_results.append(result) - except Exception as e: - logger.exception( - f"{chunk_info} store_id={row['store_id']} scrape_app failed: {e}" - ) + if use_threading: + # Threading approach for Apple App Store + use_thread_jitter = True + with ThreadPoolExecutor(max_workers=thread_workers) as executor: + # Submit all scraping tasks + future_to_row = { + executor.submit( + _scrape_single_app, + row, + store, + process_icon, + chunk_info, + use_thread_jitter, + ): idx + for idx, row in df_chunk.iterrows() + } + + # Collect results as they complete + for future in as_completed(future_to_row): + try: + result = future.result() + if result is not None: + chunk_results.append(result) + # Add slight jitter so threads don't pick up next task simultaneously + time.sleep(random.uniform(0.01, 0.05)) + except Exception as e: + row_idx = future_to_row[future] + logger.exception( + f"{chunk_info} row_idx={row_idx} thread processing failed: {e}" + ) + else: + # Sequential approach for Google Play Store (avoids SSL EOF errors) + for _, row in df_chunk.iterrows(): + try: + result = scrape_app( + store=store, + store_id=row["store_id"], + country=row["country_code"].lower(), + language=row["language"].lower(), + ) + result["store_app_db_id"] = row["store_app"] + if process_icon: + result["icon_url_100"] = row.get("icon_url_100", None) + chunk_results.append(result) + except Exception as e: + logger.exception( + f"{chunk_info} store_id={row['store_id']} scrape_app failed: {e}" + ) + if not chunk_results: logger.warning(f"{chunk_info} produced no results.") return results_df = pd.DataFrame(chunk_results) results_df["crawled_date"] = results_df["crawled_at"].dt.date app_details_to_s3(results_df, store=store) - log_crawl_results(results_df, store, database_connection=database_connection) + results_df["store_app"] = results_df["store_app_db_id"].astype(int) + log_crawl_results(results_df, database_connection=database_connection) results_df = results_df[(results_df["country"] == "US")] process_live_app_details( store=store, results_df=results_df, database_connection=database_connection, process_icon=process_icon, - df_chunk=df_chunk, ) logger.info(f"{chunk_info} finished") finally: @@ -121,8 +212,27 @@ def update_app_details( limit: int, country_priority_group: int, ) -> None: - """Process apps with dynamic work queue - simple and efficient.""" - log_info = f"{store=} update app details" + """Process apps with dynamic work queue + + Args: + database_connection: Database connection + store: Store ID + use_ssh_tunnel: Whether to use SSH tunnel + workers: Number of processes to use + process_icon: Whether to process app icons + limit: Limit on number of apps to process + country_priority_group: Country priority group + """ + log_info = f"{store=} group={country_priority_group} update_app_details" + + # Store 1 (Google Play): No threading due to urllib SSL issues + # Store 2 (Apple): Use threading as it has slower response times + if store == 1: + thread_workers = 1 + elif store == 2: + thread_workers = 3 + else: + thread_workers = 1 df = query_store_apps_to_update( store=store, @@ -131,8 +241,14 @@ def update_app_details( country_priority_group=country_priority_group, ) df = df.sort_values("country_code").reset_index(drop=True) - logger.info(f"{log_info} start {len(df)} apps") + if df.empty: + logger.info(f"{log_info} no apps to update") + return + logger.info(f"{log_info} start apps={len(df)}") + # Keep chunk size large for efficient S3 parquet files + # For store 1 (sequential), processes provide parallelism + # For store 2 (threading), threads within chunks provide parallelism max_chunk_size = 3000 chunks = [] # Try keeping countries together for larger end S3 files @@ -149,21 +265,37 @@ def update_app_details( chunks.append(country_df.iloc[i : i + chunk_size]) total_chunks = len(chunks) total_rows = len(df) - logger.info(f"{log_info} processing {total_rows} apps in {total_chunks} chunks") + + if thread_workers > 1: + logger.info( + f"{log_info} processing {total_rows} apps in {total_chunks} chunks " + f"({workers} processes × {thread_workers} threads = {workers * thread_workers} concurrent)" + ) + else: + logger.info( + f"{log_info} processing {total_rows} apps in {total_chunks} chunks " + f"({workers} processes, sequential per process)" + ) completed_count = 0 failed_count = 0 with ProcessPoolExecutor(max_workers=workers) as executor: - # Submit all chunks, but stagger the first wave to avoid API thundering herd + # Submit all chunks, but stagger the first wave to avoid API bursts future_to_idx = {} for idx, df_chunk in enumerate(chunks): future = executor.submit( - process_chunk, df_chunk, store, use_ssh_tunnel, process_icon, total_rows + process_scrape_apps_and_save, + df_chunk=df_chunk, + store=store, + use_ssh_tunnel=use_ssh_tunnel, + process_icon=process_icon, + total_rows=total_rows, + thread_workers=thread_workers, ) future_to_idx[future] = idx # Only stagger the initial batch to avoid simultaneous API burst - if idx < workers: + if idx <= workers: time.sleep(0.5) # 500ms between initial worker starts logger.info(f"{log_info} all {total_chunks} chunks submitted") # Process results as they complete @@ -222,7 +354,6 @@ def crawl_keyword_ranks(database_connection: PostgresCon) -> None: def scrape_store_ranks(database_connection: PostgresCon, store: int) -> None: collections_map = query_collections(database_connection) categories_map = query_categories(database_connection) - countries_map = query_countries(database_connection) collections_map = collections_map.rename(columns={"id": "store_collection"}) categories_map = categories_map.rename(columns={"id": "store_category"}) ranks_country_list = get_crawl_scenario_countries( @@ -242,29 +373,34 @@ def scrape_store_ranks(database_connection: PostgresCon, store: int) -> None: crawl_source="scrape_frontpage_top", collections_map=collections_map, categories_map=categories_map, - countries_map=countries_map, store=2, ) except Exception as e: logger.exception( - f"Srape iOS collection={collection_keyword} hit error={e}, skipping", + f"Srape iOS collection={collection_keyword} {country=} hit error={e}, skipping", ) if store == 1: for country in country_codes: try: ranked_dicts = scrape_google_ranks(country=country) - process_scraped( - database_connection=database_connection, - ranked_dicts=ranked_dicts, - crawl_source="scrape_frontpage_top", - collections_map=collections_map, - categories_map=categories_map, - countries_map=countries_map, - store=1, - ) + if len(ranked_dicts) > 0: + process_scraped( + database_connection=database_connection, + ranked_dicts=ranked_dicts, + crawl_source="scrape_frontpage_top", + collections_map=collections_map, + categories_map=categories_map, + store=1, + ) + else: + logger.warning( + f"Scrape google ranks {country=} produced no results, skipping" + ) except Exception as e: - logger.exception(f"Scrape google ranks hit error={e}, skipping") + logger.exception( + f"Scrape google ranks {country=} hit error={e}, skipping" + ) try: dicts = get_apkcombo_android_apps() process_scraped( @@ -293,8 +429,23 @@ def scrape_keyword( keyword: str, ) -> pd.DataFrame: logger.info(f"{keyword=} start") + retry_delay = 0.5 + retry_delays = (0, retry_delay, 1.0) try: - google_apps = search_play_store(keyword, country=country, language=language) + google_apps = None + last_google_error = None + for delay in retry_delays: + try: + if delay: + time.sleep(delay) + google_apps = search_play_store( + keyword, country=country, language=language + ) + break + except Exception as exc: + last_google_error = exc + if google_apps is None and last_google_error is not None: + raise last_google_error gdf = pd.DataFrame(google_apps) gdf["store"] = 1 gdf["rank"] = range(1, len(gdf) + 1) @@ -302,9 +453,20 @@ def scrape_keyword( gdf = pd.DataFrame() logger.exception(f"{keyword=} google failed") try: - apple_apps = search_app_store_for_ids( - keyword, country=country, language=language - ) + apple_apps = None + last_apple_error = None + for delay in retry_delays: + try: + if delay: + time.sleep(delay) + apple_apps = search_app_store_for_ids( + keyword, country=country, language=language + ) + break + except Exception as exc: + last_apple_error = exc + if apple_apps is None and last_apple_error is not None: + raise last_apple_error adf = pd.DataFrame( { "store": 2, @@ -327,10 +489,9 @@ def process_scraped( database_connection: PostgresCon, ranked_dicts: list[dict], crawl_source: str, + store: int, collections_map: pd.DataFrame | None = None, categories_map: pd.DataFrame | None = None, - countries_map: pd.DataFrame | None = None, - store: int | None = None, ) -> None: check_and_insert_new_apps( database_connection=database_connection, @@ -346,17 +507,15 @@ def process_scraped( store, collections_map, categories_map, - countries_map, ) def save_app_ranks( df: pd.DataFrame, database_connection: PostgresCon, - store: int | None, + store: int, collections_map: pd.DataFrame | None = None, categories_map: pd.DataFrame | None = None, - countries_map: pd.DataFrame | None = None, ) -> None: all_scraped_ids = df["store_id"].unique().tolist() new_existing_ids_map = query_store_id_map( @@ -376,38 +535,6 @@ def save_app_ranks( store=store, df=df, ) - if "keyword" in df.columns: - df = df.drop("store_id", axis=1) - df = ( - pd.merge( - df, - countries_map[["id", "alpha2"]], - how="left", - left_on=["country"], - right_on="alpha2", - validate="m:1", - ) - .drop(["country", "alpha2"], axis=1) - .rename(columns={"id": "country"}) - ) - process_keyword_rankings( - df=df, - database_connection=database_connection, - ) - - -def process_keyword_rankings( - df: pd.DataFrame, - database_connection: PostgresCon, -) -> None: - upsert_df( - database_connection=database_connection, - df=df, - table_name="app_keyword_rankings", - key_columns=["crawled_date", "country", "lang", "keyword", "rank", "store_app"], - insert_columns=[], - ) - logger.info("keyword rankings inserted") def extract_domains(x: str) -> str: @@ -600,11 +727,21 @@ def scrape_from_store( store_id: str, country: str, language: str, + html_last_scraped_at: datetime.datetime | None = None, ) -> dict: if store == 1: result_dict = scrape_app_gp(store_id, country=country, language=language) elif store == 2: - result_dict = scrape_app_ios(store_id, country=country, language=language) + scrape_html = False + if country == "us" and ( + html_last_scraped_at is None + or html_last_scraped_at + < datetime.datetime.now(tz=datetime.UTC) - datetime.timedelta(days=30) + ): + scrape_html = True + result_dict = scrape_app_ios( + store_id, country=country, language=language, scrape_html=scrape_html + ) else: logger.error(f"Store not supported {store=}") return result_dict @@ -623,12 +760,15 @@ def scrape_app( store_id: str, country: str, language: str, + html_last_scraped_at: datetime.datetime | None = None, ) -> dict: - scrape_info = f"{store=}, {country=}, {language=}, {store_id=}" + scrape_info = f"{store=}, {country=}, {language=}, {store_id=} scrape_app" max_retries = 2 base_delay = 0.5 retries = 0 - logger.debug(f"{scrape_info} scrape start") + logger.debug(f"{scrape_info} start") + # Satisfy mypy + crawl_result = 0 while retries <= max_retries: retries += 1 try: @@ -637,6 +777,7 @@ def scrape_app( store_id=store_id, country=country, language=language, + html_last_scraped_at=html_last_scraped_at, ) crawl_result = 1 break # If successful, break out of the retry loop @@ -652,12 +793,13 @@ def scrape_app( crawl_result = 4 logger.exception(f"{scrape_info} unexpected error: {error=}") break - except URLError as error: - logger.warning(f"{scrape_info} {error=}") + except (URLError, ssl.SSLError, requests.exceptions.SSLError) as error: + logger.warning(f"{scrape_info} Network/SSL error: {error=}") crawl_result = 4 if retries <= max_retries: - sleep_time = base_delay * (2**retries) - logger.info(f"{scrape_info} Retrying in {sleep_time} seconds...") + # Add extra jitter for SSL errors to avoid connection conflicts + sleep_time = base_delay * (2**retries) + random.uniform(0.1, 0.5) + logger.info(f"{scrape_info} Retrying in {sleep_time:.2f} seconds...") time.sleep(sleep_time) continue else: @@ -678,7 +820,7 @@ def scrape_app( result_dict["store_id"] = store_id result_dict["queried_language"] = language.lower() result_dict["country"] = country.upper() - logger.debug(f"{scrape_info} result={crawl_result} scrape finished") + logger.info(f"{scrape_info} {crawl_result=} finished") return result_dict @@ -710,7 +852,6 @@ def process_live_app_details( results_df: pd.DataFrame, database_connection: PostgresCon, process_icon: bool, - df_chunk: pd.DataFrame, ) -> None: for crawl_result, apps_df in results_df.groupby("crawl_result"): logger.info(f"{store=} {crawl_result=} processing {len(apps_df)} apps for db") @@ -724,12 +865,6 @@ def process_live_app_details( apps_df.loc[apps_df["description_short"].isna(), "description_short"] = "" if process_icon: try: - apps_df = pd.merge( - apps_df, - df_chunk[["store_id", "icon_url_100"]], - on="store_id", - how="inner", - ) no_icon = apps_df["icon_url_100"].isna() if apps_df[no_icon].empty: pass @@ -744,9 +879,6 @@ def process_live_app_details( ) except Exception: logger.exception("failed to process app icon") - # I think only coming from S3? - # apps_df = apps_df.convert_dtypes(dtype_backend="pyarrow") - # apps_df = apps_df.replace({pd.NA: None}) apps_details_to_db( apps_df=apps_df, database_connection=database_connection, @@ -766,25 +898,16 @@ def apps_details_to_db( x for x in get_store_app_columns(database_connection) if x in apps_df.columns ] apps_df = prepare_for_psycopg(apps_df) - return_rows = crawl_result == 1 logger.info(f"{crawl_result=} update store_apps table for {len(apps_df)} apps") - store_apps_df = update_from_df( + update_from_df( table_name="store_apps", df=apps_df, update_columns=insert_columns, key_columns=key_columns, database_connection=database_connection, - return_rows=return_rows, ) - if store_apps_df is None or store_apps_df.empty or crawl_result != 1: + if apps_df is None or apps_df.empty or crawl_result != 1: return - store_apps_df = store_apps_df.rename(columns={"id": "store_app"}) - apps_df = pd.merge( - apps_df, - store_apps_df[["store_id", "store_app"]], - how="left", - validate="1:1", - ) upsert_store_apps_descriptions(apps_df, database_connection) save_app_domains( apps_df=apps_df, @@ -831,19 +954,11 @@ def upsert_store_apps_descriptions( ) -def log_crawl_results( - app_df: pd.DataFrame, store: int, database_connection: PostgresCon -) -> None: - store_id_map = query_store_id_map_cached( - store=store, database_connection=database_connection - ) +def log_crawl_results(app_df: pd.DataFrame, database_connection: PostgresCon) -> None: country_map = query_countries(database_connection) app_df["country_id"] = app_df["country"].map( country_map.set_index("alpha2")["id"].to_dict() ) - app_df["store_app"] = app_df["store_id"].map( - store_id_map.set_index("store_id")["id"].to_dict() - ) insert_columns = [ "crawl_result", "store_app", diff --git a/adscrawler/app_stores/utils.py b/adscrawler/app_stores/utils.py index f50cfa92..04357aec 100644 --- a/adscrawler/app_stores/utils.py +++ b/adscrawler/app_stores/utils.py @@ -25,7 +25,10 @@ def truncate_utf8_bytes(s: str, max_bytes: int = 2400) -> str: def check_and_insert_new_apps( - dicts: list[dict], database_connection: PostgresCon, crawl_source: str, store: int + dicts: list[dict], + database_connection: PostgresCon, + crawl_source: str, + store: int, ) -> None: df = pd.DataFrame(dicts) if store in [1, 2]: @@ -72,3 +75,4 @@ def check_and_insert_new_apps( database_connection=database_connection, schema="logging", ) + return None diff --git a/adscrawler/config.py b/adscrawler/config.py index 1b2a5001..23a107d6 100644 --- a/adscrawler/config.py +++ b/adscrawler/config.py @@ -1,4 +1,5 @@ import logging +import logging.handlers import os import pathlib import sys @@ -6,7 +7,6 @@ import traceback import typing from logging import Formatter -from logging.handlers import RotatingFileHandler HOME = pathlib.Path.home() PROJECT_NAME = "adscrawler" @@ -135,6 +135,22 @@ def check_dirs() -> None: global_loggers: dict[str, logging.Logger] = {} +def _truncate_oversized_log(log_file: str, max_size: int) -> None: + """Check log file size on startup and truncate if too large""" + if not os.path.exists(log_file): + return + size = os.path.getsize(log_file) + if size > max_size: + keep_size = int(max_size * 0.2) + with open(log_file, "rb") as f: + f.seek(-keep_size, os.SEEK_END) + f.readline() # Skip to next newline + content = f.read() + with open(log_file, "wb") as f: + warning = b"\n=== LOG TRUNCATED ON STARTUP (exceeded size limit) ===\n\n" + f.write(warning + content) + + def get_logger(mod_name: str, sep_file: str | None = "main") -> logging.Logger: global global_loggers # noqa: PLW0602 @@ -157,11 +173,12 @@ def get_logger(mod_name: str, sep_file: str | None = "main") -> logging.Logger: syslog_handler.ident = f"{sep_file}: " logger.addHandler(syslog_handler) - # Add fallback log file, but struggles in multi-process environments - indiv_handler = RotatingFileHandler( + _truncate_oversized_log( + os.path.join(LOG_DIR, f"{sep_file}.log"), 500 * 1024 * 1024 + ) # 50MB + + indiv_handler = logging.handlers.WatchedFileHandler( filename=os.path.join(LOG_DIR, f"{sep_file}.log"), - maxBytes=50 * 1024 * 1024, - backupCount=10, ) indiv_handler.setFormatter(FORMATTER) logger.addHandler(indiv_handler) diff --git a/adscrawler/dbcon/queries.py b/adscrawler/dbcon/queries.py index 5249171d..10e2c42d 100644 --- a/adscrawler/dbcon/queries.py +++ b/adscrawler/dbcon/queries.py @@ -7,7 +7,7 @@ import pandas as pd from psycopg import Connection from psycopg.sql import SQL, Composed, Identifier -from sqlalchemy import text +from sqlalchemy import bindparam, text from sqlalchemy.sql.elements import TextClause from adscrawler.config import CONFIG, SQL_DIR, get_logger @@ -25,6 +25,7 @@ def load_sql_file(file_name: str) -> TextClause: QUERY_APPS_TO_UPDATE_SECONDARY = load_sql_file("query_apps_to_update_secondary.sql") QUERY_APPS_TO_UPDATE_PRIMARY = load_sql_file("query_apps_to_update_primary.sql") +QUERY_APPS_TO_UPDATE_ANY_NEW = load_sql_file("query_apps_to_update_any_new.sql") QUERY_APPS_TO_DOWNLOAD = load_sql_file("query_apps_to_download.sql") QUERY_APPS_TO_SDK_SCAN = load_sql_file("query_apps_to_sdk_scan.sql") QUERY_APPS_TO_API_SCAN = load_sql_file("query_apps_to_api_scan.sql") @@ -33,6 +34,7 @@ def load_sql_file(file_name: str) -> TextClause: QUERY_APPS_MITM_IN_S3 = load_sql_file("query_apps_mitm_in_s3.sql") QUERY_ZSCORES = load_sql_file("query_simplified_store_app_z_scores.sql") QUERY_APPS_TO_PROCESS_KEYWORDS = load_sql_file("query_apps_to_process_keywords.sql") +QUERY_APPS_TO_PROCESS_METRICS = load_sql_file("query_apps_to_process_metrics.sql") def insert_df( @@ -126,6 +128,7 @@ def prepare_for_psycopg(df: pd.DataFrame) -> pd.DataFrame: df = df.copy() for col in df.select_dtypes(include=["datetimetz", "datetime64[ns]"]): # Convert to object dtype first so it can hold None + # Note: This may be breaking in pandas3.0 df[col] = ( df[col] .apply(lambda x: x.to_pydatetime() if pd.notna(x) else None) @@ -384,9 +387,11 @@ def upsert_df( if return_rows: if len(key_columns) == 1 and key_columns[0] in (md5_key_columns or []): md5_values = [ - hashlib.md5(v.encode("utf-8")).hexdigest() - if v is not None - else None + ( + hashlib.md5(v.encode("utf-8")).hexdigest() + if v is not None + else None + ) for v in df[key_columns[0]].tolist() ] where_values = (md5_values,) @@ -403,6 +408,34 @@ def upsert_df( return return_df +def clean_app_ranks_weekly_table(database_connection: PostgresCon): + # Use a smaller limit to prevent long locks + batch_size = 100000 + del_query = f""" + DELETE FROM frontend.store_app_ranks_weekly + WHERE ctid IN ( + SELECT ctid FROM frontend.store_app_ranks_weekly + WHERE crawled_date < CURRENT_DATE - INTERVAL '14 days' + AND EXTRACT(DOW FROM crawled_date) != 1 + LIMIT {batch_size} + ); + """ + raw_conn = database_connection.engine.raw_connection() + # Ensure we aren't in an implicit transaction block that stays open + raw_conn.set_session(autocommit=True) + try: + with raw_conn.cursor() as cur: + while True: + cur.execute(del_query) + rows_affected = cur.rowcount + print(f"Deleted {rows_affected} rows...") + + if rows_affected == 0: + break + finally: + raw_conn.close() + + def delete_and_insert( df: pd.DataFrame, table_name: str, @@ -458,7 +491,7 @@ def delete_and_insert( def query_all_developers(database_connection: PostgresCon) -> pd.DataFrame: """Query all developers from the database.""" sel_query = """SELECT - id, store, name, developer_id + id, store, developer_id FROM developers ; """ @@ -636,7 +669,7 @@ def query_latest_api_scan_by_store_id( return df -def upsert_details_df( +def upser_sdk_details_df( details_df: pd.DataFrame, database_connection: PostgresCon, store_id: str, @@ -766,9 +799,10 @@ def query_categories(database_connection: PostgresCon) -> pd.DataFrame: @lru_cache(maxsize=1) def query_countries(database_connection: PostgresCon) -> pd.DataFrame: sel_query = """SELECT - * + c.*, t.tier_slug as tier FROM - countries + countries c + LEFT JOIN public.tiers t on c.tier_id = t.id ; """ df = pd.read_sql(sel_query, database_connection.engine) @@ -875,23 +909,35 @@ def query_pub_domains_to_crawl_ads_txt( return df -def query_urls_id_map( - urls: list[str], database_connection: PostgresCon -) -> pd.DataFrame: - """ - Get URL IDs by looking up MD5 hashes. - Returns DataFrame with columns: url, id +@lru_cache(maxsize=1) +def query_urls_hash_map_cached(database_connection: PostgresCon) -> pd.DataFrame: """ - if not urls: - return pd.DataFrame(columns=["url", "id"]) - url_hash_map = {url: hashlib.md5(url.encode()).hexdigest() for url in urls} - hashes_str = "'" + "','".join(url_hash_map.values()) + "'" - sel_query = f""" - SELECT url, id, url_hash - FROM adtech.urls - WHERE url_hash IN ({hashes_str}) + Get URL IDs and hashes from the urls table. + Returns DataFrame with columns: url_id, url """ + sel_query = """SELECT id, url_hash FROM adtech.urls""" df = pd.read_sql(sel_query, database_connection.engine) + df = df.rename(columns={"id": "url_id"}) + return df + + +def query_urls_by_hashes( + hashes: list[str], database_connection: PostgresCon +) -> pd.DataFrame: + hashes_tuple = tuple(hashes) + if not hashes_tuple: + return pd.DataFrame(columns=["id", "url_hash"]) + return _query_urls_by_hashes_cached(hashes_tuple, database_connection) + + +@lru_cache(maxsize=1000) +def _query_urls_by_hashes_cached( + hashes: tuple[str, ...], database_connection: PostgresCon +) -> pd.DataFrame: + sel_query = text( + """SELECT id, url_hash FROM adtech.urls WHERE url_hash IN :hashes""" + ).bindparams(bindparam("hashes", expanding=True)) + df = pd.read_sql(sel_query, database_connection.engine, params={"hashes": hashes}) return df @@ -1003,10 +1049,12 @@ def query_store_apps_to_update( "year_ago_ts": year_ago_ts, "mylimit": limit, } - if country_priority_group == 1: - query = QUERY_APPS_TO_UPDATE_PRIMARY - else: - query = QUERY_APPS_TO_UPDATE_SECONDARY + country_priority_group_query = { + -1: QUERY_APPS_TO_UPDATE_ANY_NEW, + 1: QUERY_APPS_TO_UPDATE_PRIMARY, + 2: QUERY_APPS_TO_UPDATE_SECONDARY, + } + query = country_priority_group_query[country_priority_group] if log_query: # Compile and print the query with parameters @@ -1158,6 +1206,18 @@ def query_apps_to_process_keywords( return df +def query_apps_to_process_global_metrics( + database_connection: PostgresCon, batch_size: int = 10000 +) -> pd.DataFrame: + """Query apps to process metrics.""" + df = pd.read_sql( + QUERY_APPS_TO_PROCESS_METRICS, + con=database_connection.engine, + params={"batch_size": batch_size}, + ) + return df + + def query_apps_mitm_in_s3(database_connection: PostgresCon) -> pd.DataFrame: df = pd.read_sql( QUERY_APPS_MITM_IN_S3, @@ -1345,7 +1405,8 @@ def get_failed_mitm_logs(database_connection: PostgresCon) -> pd.DataFrame: sel_query = """WITH last_run_result AS (SELECT DISTINCT ON (run_id) run_id, pub_store_id, error_msg, inserted_at FROM logging.creative_scan_results - ORDER BY run_id, inserted_at DESC) + ORDER BY run_id, inserted_at DESC + ) SELECT * FROM last_run_result WHERE error_msg like 'CRITICAL %%' @@ -1422,3 +1483,153 @@ def get_all_mmp_tlds(database_connection: PostgresCon) -> pd.DataFrame: """ df = pd.read_sql(sel_query, con=database_connection.engine) return df + + +import pandas as pd + + +def get_latest_app_country_history( + database_connection: PostgresCon, + snapshot_date: datetime.date, + store_app_ids: list, # The 200k IDs from your DuckDB/S3 query + days_back: int = 30, + chunk_size: int = 5000, +) -> pd.DataFrame: + start_date = (snapshot_date - datetime.timedelta(days=days_back)).strftime( + "%Y-%m-%d" + ) + end_date = snapshot_date.strftime("%Y-%m-%d") + chunks = [ + store_app_ids[i : i + chunk_size] + for i in range(0, len(store_app_ids), chunk_size) + ] + results = [] + logger.info( + f"Get apps:{len(store_app_ids)} chunks:{len(chunks)} from app_country_metrics_history" + ) + for i, chunk_ids in enumerate(chunks): + id_list_str = ",".join(map(str, chunk_ids)) + sel_query = f""" + SELECT DISTINCT ON (acmh.store_app, acmh.country_id) + acmh.store_app, + acmh.snapshot_date, + acmh.country_id, + review_count, + rating, + rating_count, + one_star, + two_star, + three_star, + four_star, + five_star + FROM app_country_metrics_history acmh + WHERE acmh.store_app IN ({id_list_str}) + AND acmh.snapshot_date >= '{start_date}' + AND acmh.snapshot_date <= '{end_date}' + ORDER BY + store_app, + country_id, + snapshot_date DESC + """ + # Pull chunk and append to results list + chunk_df = pd.read_sql(sel_query, con=database_connection.engine) + results.append(chunk_df) + if (i + 1) % 5 == 0: + print(f"Finished chunk {i + 1}/{len(chunks)}...") + if not results: + return pd.DataFrame() + df = pd.concat(results, ignore_index=True) + df["crawled_date"] = pd.to_datetime(df["snapshot_date"]) + return df + + +# def get_latest_app_country_history( +# store: int, +# database_connection: PostgresCon, +# snapshot_date: datetime.date, +# days_back: int, +# ) -> pd.DataFrame: +# start_date = (snapshot_date - datetime.timedelta(days=days_back)).strftime( +# "%Y-%m-%d" +# ) +# end_date = snapshot_date.strftime("%Y-%m-%d") +# sel_query = f"""SELECT +# DISTINCT ON +# ( +# store_app, +# country_id +# ) +# acmh.store_app, +# acmh.snapshot_date, +# acmh.country_id, +# review_count, +# rating, +# rating_count, +# one_star, +# two_star, +# three_star, +# four_star, +# five_star +# FROM +# app_country_metrics_history acmh +# LEFT JOIN store_apps sa ON +# acmh.store_app = sa.id +# WHERE +# snapshot_date >= '{start_date}' +# AND snapshot_date <= '{end_date}' +# AND sa.store = {store} +# ORDER BY +# store_app, +# country_id, +# snapshot_date DESC +# """ +# df = pd.read_sql(sel_query, con=database_connection.engine) +# return df + + +def get_retention_benchmarks(database_connection: PostgresCon) -> pd.DataFrame: + sel_query = """WITH + retention_benchmarks AS ( + SELECT + mac.store, + mac.category AS app_category, + -- Retention D1 Logic + COALESCE(rgb.d1, + CASE + WHEN mac.category LIKE 'game%%' THEN (SELECT d1 FROM retention_global_benchmarks WHERE app_category = 'games' LIMIT 1) + ELSE (SELECT d1 FROM retention_global_benchmarks WHERE app_category = 'apps' LIMIT 1) + END + ) AS d1, + -- Retention D7 Logic + COALESCE(rgb.d7, + CASE + WHEN mac.category LIKE 'game%%' THEN (SELECT d7 FROM retention_global_benchmarks WHERE app_category = 'games' LIMIT 1) + ELSE (SELECT d7 FROM retention_global_benchmarks WHERE app_category = 'apps' LIMIT 1) + END + ) AS d7, + -- Retention D30 Logic + COALESCE(rgb.d30, + CASE + WHEN mac.category LIKE 'game%%' THEN (SELECT d30 FROM retention_global_benchmarks WHERE app_category = 'games' LIMIT 1) + ELSE (SELECT d30 FROM retention_global_benchmarks WHERE app_category = 'apps' LIMIT 1) + END + ) AS d30 + FROM + mv_app_categories mac + LEFT JOIN retention_global_benchmarks rgb + ON + mac.category = rgb.app_category + AND mac.store = rgb.store + ) + SELECT + store, + app_category, + d1, + d7, + d30 + FROM + retention_benchmarks + ; + """ + df = pd.read_sql(sel_query, con=database_connection.engine) + return df diff --git a/adscrawler/dbcon/sql/query_apps_to_api_scan.sql b/adscrawler/dbcon/sql/query_apps_to_api_scan.sql index 18df11e7..6db6dbec 100644 --- a/adscrawler/dbcon/sql/query_apps_to_api_scan.sql +++ b/adscrawler/dbcon/sql/query_apps_to_api_scan.sql @@ -138,7 +138,7 @@ user_requested_apps_crawl AS ( lsvc.last_downloaded_at, urs.created_at AS user_requested_at FROM - user_requested_scan AS urs + agadmin.user_requested_scan AS urs LEFT JOIN store_apps AS sa ON urs.store_id = sa.store_id diff --git a/adscrawler/dbcon/sql/query_apps_to_download.sql b/adscrawler/dbcon/sql/query_apps_to_download.sql index a1226108..7d381044 100644 --- a/adscrawler/dbcon/sql/query_apps_to_download.sql +++ b/adscrawler/dbcon/sql/query_apps_to_download.sql @@ -20,6 +20,7 @@ latest_success_version_codes AS ( store_app, version_code, created_at, + updated_at, crawl_result FROM version_codes @@ -58,8 +59,10 @@ growth_apps AS ( SELECT sa.id AS store_app, saz.store_id - FROM frontend.store_apps_z_scores AS saz - LEFT JOIN store_apps AS sa ON saz.store_id = sa.store_id + FROM + frontend.z_scores_top_apps AS saz + LEFT JOIN store_apps AS sa ON + saz.store_id = sa.store_id ), scheduled_apps_crawl AS ( SELECT @@ -87,27 +90,44 @@ scheduled_apps_crawl AS ( LEFT JOIN faily_downloads_quarter AS fdq ON vc.store_app = fdq.store_app + LEFT JOIN store_apps AS sa + ON + dc.store_app = sa.id WHERE dc.store = :store - AND - ( + AND ( vc.updated_at IS NULL OR ( ( - vc.crawl_result = 1 - AND ( - vc.updated_at < current_date - interval '180 days' - OR vc.updated_at < '2025-05-01' + -- never downloaded + lsvc.created_at IS NULL + -- success not downloaded > x days and store recently updated + OR ( + lsvc.updated_at < current_date - interval '120 days' + AND ( + sa.store_last_updated + > current_date - interval '90 days' + OR sa.store_last_updated IS NULL + ) ) ) OR + -- Retry failing every couple days, including same x days from above ( - (lsvc.created_at IS NULL OR lsvc.created_at < '2025-05-01') - AND vc.crawl_result IN ( - 2, 3, 4 + lsvc.created_at IS NULL + OR lsvc.created_at < current_date - interval '120 days' + AND ( + sa.store_last_updated + > current_date - interval '90 days' + OR sa.store_last_updated IS NULL + ) + AND ( + vc.crawl_result IN ( + 2, 3, 4 + ) + AND vc.updated_at < current_date - interval '2 days' ) - AND vc.updated_at < current_date - interval '2 days' ) ) ) @@ -127,12 +147,13 @@ user_requested_apps_crawl AS ( coalesce(fd.attempt_count, 0) AS failed_attempts_month, coalesce(fdq.attempt_count, 0) AS failed_attempts_quarter FROM - user_requested_scan AS urs + agadmin.user_requested_scan AS urs LEFT JOIN store_apps AS sa ON urs.store_id = sa.store_id LEFT JOIN app_global_metrics_latest AS agm - ON sa.id = agm.store_app + ON + sa.id = agm.store_app LEFT JOIN latest_success_version_codes AS lsvc ON sa.id = lsvc.store_app @@ -200,7 +221,11 @@ combined AS ( last_crawl_result, CASE WHEN - store_app IN (SELECT ga.store_app FROM growth_apps AS ga) + store_app IN ( + SELECT ga.store_app + FROM + growth_apps AS ga + ) THEN 'top_scheduled' ELSE 'scheduled' END AS mysource, @@ -209,10 +234,11 @@ combined AS ( FROM scheduled_apps_crawl WHERE - failed_attempts_month < 2 AND failed_attempts_quarter < 4 + failed_attempts_month < 2 + AND failed_attempts_quarter < 4 AND ( last_downloaded_at IS NULL - OR last_downloaded_at < current_date - interval '180 days' + OR last_downloaded_at < current_date - interval '120 days' ) ), final_selection AS ( diff --git a/adscrawler/dbcon/sql/query_apps_to_process_keywords.sql b/adscrawler/dbcon/sql/query_apps_to_process_keywords.sql index 4256a754..4d343f82 100644 --- a/adscrawler/dbcon/sql/query_apps_to_process_keywords.sql +++ b/adscrawler/dbcon/sql/query_apps_to_process_keywords.sql @@ -1,6 +1,5 @@ WITH latest_descriptions AS ( - SELECT DISTINCT ON - (sad.store_app) + SELECT DISTINCT ON (sad.store_app) sad.id AS description_id, sad.store_app, sad.description_short, @@ -16,48 +15,54 @@ WITH latest_descriptions AS ( ), latest_extractions AS ( SELECT DISTINCT ON - (ak.store_app) - ak.store_app, - ak.extracted_at AS last_extracted_at + (adke.description_id) + adke.description_id, + adke.store_app, + adke.extracted_at AS app_keywords_extracted_at FROM - app_keywords_extracted AS ak + logging.app_description_keywords_extracted AS adke ORDER BY - ak.store_app ASC, - ak.extracted_at DESC + adke.description_id ASC, + adke.extracted_at DESC ), base AS ( SELECT ld.store_app, ld.description_id, - le.last_extracted_at, + le.app_keywords_extracted_at, ld.description_short, ld.description FROM latest_descriptions AS ld LEFT JOIN latest_extractions AS le ON - ld.store_app = le.store_app - WHERE le.last_extracted_at IS NULL OR ( - ld.description_last_updated > le.last_extracted_at - AND le.last_extracted_at <= NOW() - INTERVAL '7 days' - ) + ld.description_id = le.description_id + WHERE + le.app_keywords_extracted_at IS NULL + OR ld.description_last_updated > le.app_keywords_extracted_at + OR le.app_keywords_extracted_at <= NOW() - INTERVAL '31 days' ) SELECT b.store_app, b.description_id, - b.last_extracted_at, + b.app_keywords_extracted_at, b.description_short, b.description FROM base AS b INNER JOIN app_global_metrics_latest AS agml ON b.store_app = agml.store_app ORDER BY - (CASE WHEN b.last_extracted_at IS NULL THEN 1 ELSE 0 END) DESC, -- always crawl new ones first + (CASE WHEN b.app_keywords_extracted_at IS NULL THEN 1 ELSE 0 END) DESC, -- always crawl new ones first ( GREATEST( COALESCE(agml.installs, 0), COALESCE(agml.rating_count::BIGINT, 0) ) - * (10 * COALESCE(EXTRACT(DAY FROM (NOW() - b.last_extracted_at)), 1)) + * ( + 10 + * COALESCE( + EXTRACT(DAY FROM (NOW() - b.app_keywords_extracted_at)), 1 + ) + ) ) DESC LIMIT :mylimit; diff --git a/adscrawler/dbcon/sql/query_apps_to_process_metrics.sql b/adscrawler/dbcon/sql/query_apps_to_process_metrics.sql new file mode 100644 index 00000000..c6797496 --- /dev/null +++ b/adscrawler/dbcon/sql/query_apps_to_process_metrics.sql @@ -0,0 +1,42 @@ +WITH candidate_apps AS ( + SELECT + sa.store, + sa.id AS store_app, + sa.category AS app_category + FROM + frontend.store_apps_overview AS sa + WHERE + ( + sa.crawl_result = 1 + OR sa.store_last_updated >= current_date - INTERVAL '365 days' + ) + ORDER BY sa.id -- helps predictable batching + LIMIT :batch_size * 5 +) +SELECT + agmh.snapshot_date, + ca.store, + agmh.store_app, + ca.app_category, + agmh.installs, + agmh.rating_count, + agmh.review_count, + agmh.rating, + agmh.one_star, + agmh.two_star, + agmh.three_star, + agmh.four_star, + agmh.five_star +FROM candidate_apps AS ca +INNER JOIN app_global_metrics_history AS agmh + ON ca.store_app = agmh.store_app +WHERE + agmh.snapshot_date >= current_date - INTERVAL '400 days' + AND NOT EXISTS ( + SELECT 1 + FROM logging.app_global_metrics_weekly AS lg + WHERE + lg.store_app = ca.store_app + AND lg.updated_at > current_date - INTERVAL '3 days' + ) +LIMIT :batch_size; diff --git a/adscrawler/dbcon/sql/query_apps_to_sdk_scan.sql b/adscrawler/dbcon/sql/query_apps_to_sdk_scan.sql index 8a6bbc5d..70120acd 100644 --- a/adscrawler/dbcon/sql/query_apps_to_sdk_scan.sql +++ b/adscrawler/dbcon/sql/query_apps_to_sdk_scan.sql @@ -126,7 +126,7 @@ user_requested_apps_crawl AS ( lsvc.scanned_at AS last_scuccess_scanned_at, lvc.id AS latest_version_code_db_id FROM - user_requested_scan AS urs + agadmin.user_requested_scan AS urs LEFT JOIN store_apps AS sa ON urs.store_id = sa.store_id diff --git a/adscrawler/dbcon/sql/query_apps_to_update_any_new.sql b/adscrawler/dbcon/sql/query_apps_to_update_any_new.sql new file mode 100644 index 00000000..97b68667 --- /dev/null +++ b/adscrawler/dbcon/sql/query_apps_to_update_any_new.sql @@ -0,0 +1,14 @@ +SELECT + sa.store, + sa.id AS store_app, + sa.store_id, + 'US' AS country_code, + sa.icon_url_100, + sa.updated_at AS app_updated_at +FROM + public.store_apps AS sa +WHERE + sa.store = :store + -- Always crawl new apps + AND sa.crawl_result IS NULL +LIMIT :mylimit; diff --git a/adscrawler/dbcon/sql/query_apps_to_update_primary.sql b/adscrawler/dbcon/sql/query_apps_to_update_primary.sql index ed513a44..614a2426 100644 --- a/adscrawler/dbcon/sql/query_apps_to_update_primary.sql +++ b/adscrawler/dbcon/sql/query_apps_to_update_primary.sql @@ -41,7 +41,7 @@ SELECT ctc.alpha2 AS country_code, ctc.priority, sa.icon_url_100, - sa.additional_html_scraped_at, + sa.additional_html_scraped_at AS html_last_scraped_at, sa.updated_at AS app_updated_at, lc.crawled_at AS country_crawled_at FROM diff --git a/adscrawler/dbcon/sql/query_apps_to_update_secondary.sql b/adscrawler/dbcon/sql/query_apps_to_update_secondary.sql index db7115d9..6e1fd61e 100644 --- a/adscrawler/dbcon/sql/query_apps_to_update_secondary.sql +++ b/adscrawler/dbcon/sql/query_apps_to_update_secondary.sql @@ -4,7 +4,6 @@ WITH target_apps AS ( sa.id AS store_app, sa.store_id, sa.icon_url_100, - sa.additional_html_scraped_at, sa.updated_at, sa.store_last_updated, agm.installs, @@ -102,7 +101,6 @@ SELECT ctc.alpha2 AS country_code, ctc.priority, sa.icon_url_100, - sa.additional_html_scraped_at, sa.updated_at AS app_updated_at, lc.crawled_at AS country_crawled_at FROM diff --git a/adscrawler/dbcon/sql/query_keywords_to_crawl.sql b/adscrawler/dbcon/sql/query_keywords_to_crawl.sql index ba6de3ad..74335f99 100644 --- a/adscrawler/dbcon/sql/query_keywords_to_crawl.sql +++ b/adscrawler/dbcon/sql/query_keywords_to_crawl.sql @@ -1,7 +1,7 @@ WITH rank_crawled_keywords AS ( - SELECT DISTINCT akr.keyword + SELECT DISTINCT akr.keyword_id FROM - app_keyword_rankings AS akr + frontend.app_keyword_ranks_daily AS akr WHERE akr.crawled_date > CURRENT_DATE - INTERVAL '7 days' ), @@ -11,28 +11,63 @@ log_crawled_keywords AS ( logging.keywords_crawled_at WHERE crawled_at > CURRENT_DATE - INTERVAL '7 days' +), +scheduled_keywords AS ( + SELECT + ks.keyword_id, + ks.keyword_text, + ks.app_count, + ks.total_apps + FROM + frontend.keyword_scores AS ks + WHERE + ks.keyword_id IN (SELECT kb.keyword_id FROM keywords_base AS kb) + AND + ( + ks.keyword_id NOT IN ( + SELECT rck.keyword_id + FROM + rank_crawled_keywords AS rck + ) + OR ks.keyword_id NOT IN ( + SELECT lck.keyword + FROM + log_crawled_keywords AS lck + ) + ) + ORDER BY + ks.competitiveness_score + DESC +), +distinct_sq AS ( + SELECT DISTINCT search_term + FROM + agadmin.search_queries ) SELECT - store, - keyword_id, - keyword_text, - app_count, - total_apps, - competitiveness_score + k.id AS keyword_id, + k.keyword_text, + 'user' AS priority, + 0 AS app_count, + 0 AS total_apps FROM - frontend.keyword_scores + distinct_sq AS sq +LEFT JOIN keywords AS k + ON + sq.search_term = k.keyword_text WHERE - keyword_id NOT IN ( - SELECT rck.keyword - FROM - rank_crawled_keywords AS rck - ) - OR keyword_id NOT IN ( + k.id NOT IN ( SELECT lck.keyword FROM log_crawled_keywords AS lck ) -ORDER BY - competitiveness_score - DESC +UNION ALL +SELECT + sk.keyword_id, + sk.keyword_text, + 'scheduled' AS priority, + sk.app_count, + sk.total_apps +FROM + scheduled_keywords AS sk LIMIT :mylimit; diff --git a/adscrawler/mitm_ad_parser/mitm_scrape_ads.py b/adscrawler/mitm_ad_parser/mitm_scrape_ads.py index cce468e8..76f7f033 100644 --- a/adscrawler/mitm_ad_parser/mitm_scrape_ads.py +++ b/adscrawler/mitm_ad_parser/mitm_scrape_ads.py @@ -61,6 +61,9 @@ def find_sent_video_df( def get_video_id(row: pd.Series) -> str: """Extracts video ID from URL based on the ad network domain.""" + if not row["tld_url"]: + # Likely IP address or other non-URL + return "" if "2mdn" in row["tld_url"]: if "/id/" in row["url"]: url_parts = urllib.parse.urlparse(row["url"]) @@ -106,7 +109,6 @@ def attribute_creatives( creatives["video_id"] = creatives.apply(lambda x: get_video_id(x), axis=1) creatives = creatives.drop_duplicates(subset=["video_id", "response_size_bytes"]) row_count = creatives.shape[0] - # For duplicate video_id sent_video_cache = {} parse_results_cache = {} i = 0 @@ -161,6 +163,7 @@ def attribute_creatives( found_ad_infos, found_error_messages = parse_sent_video_df( row, pub_store_id, sent_video_df, database_connection, video_id ) + sent_video_cache[video_id] = sent_video_df parse_results_cache[video_id] = (found_ad_infos, found_error_messages) for found_error_message in found_error_messages: @@ -220,6 +223,12 @@ def attribute_creatives( found_ad_network_tlds = list( set([item for sublist in found_ad_network_tlds for item in sublist]) ) + click_url_ids = [ + x["click_url_ids"] for x in found_ad_infos if x["click_url_ids"] is not None + ] + click_url_ids = list( + set([item for sublist in click_url_ids for item in sublist]) + ) try: md5_hash = store_creative_and_thumb_to_local( row, @@ -249,12 +258,18 @@ def attribute_creatives( row["error_msg"] = error_msg error_messages.append(row) continue - init_tlds = [x["init_tld"] for x in found_ad_infos] + init_tlds = list(set([x["init_tld"] for x in found_ad_infos])) if len(init_tlds) == 0: init_tld = None error_msg = "No initial domain found" row["error_msg"] = error_msg error_messages.append(row) + elif len(init_tlds) > 1: + error_msg = "Multiple initial domains found, perhaps log both?" + logger.error(f"{error_msg} for {row['tld_url']} {video_id}") + row["error_msg"] = error_msg + error_messages.append(row) + continue else: init_tld = init_tlds[0] adv_creatives.append( @@ -264,6 +279,7 @@ def attribute_creatives( "md5_hash": md5_hash, "host_ad_network_tld": host_ad_network_tld, "creative_initial_domain_tld": init_tld, + "click_url_ids": click_url_ids, "adv_store_id": adv_store_id, "advertiser_store_app_id": adv_store_app_id, "mmp_urls": found_mmp_urls, @@ -431,6 +447,7 @@ def make_creative_records_df( "mmp_domain_id", "mmp_urls", "additional_ad_domain_ids", + "click_url_ids", ] ] # Nullable IDs, watch out for Int64 @@ -519,6 +536,7 @@ def parse_store_id_mitm_log( "advertiser_store_app_id", "advertiser_domain_id", "mmp_domain_id", + "click_url_ids", "additional_ad_domain_ids", "mmp_urls", "updated_at", diff --git a/adscrawler/mitm_ad_parser/models.py b/adscrawler/mitm_ad_parser/models.py index 51bafc0d..824ca398 100644 --- a/adscrawler/mitm_ad_parser/models.py +++ b/adscrawler/mitm_ad_parser/models.py @@ -1,7 +1,7 @@ import dataclasses from typing import Any, Self -import tldextract +from adscrawler.mitm_ad_parser.utils import get_tld @dataclasses.dataclass @@ -11,6 +11,7 @@ class AdInfo: init_tld: str | None = None found_ad_network_tlds: list[str] | None = None found_mmp_urls: list[str] | None = None + click_url_ids: list[int] | None = None def __getitem__(self: Self, key: str) -> Any: """Support dictionary-style access to dataclass fields.""" @@ -23,11 +24,7 @@ def __setitem__(self: Self, key: str, value: Any) -> None: @property def mmp_tld(self: Self) -> str | None: if self.found_mmp_urls and len(self.found_mmp_urls) > 0: - return ( - tldextract.extract(self.found_mmp_urls[0]).domain - + "." - + tldextract.extract(self.found_mmp_urls[0]).suffix - ) + return get_tld(self.found_mmp_urls[0]) return None diff --git a/adscrawler/mitm_ad_parser/network_parsers.py b/adscrawler/mitm_ad_parser/network_parsers.py index 28f2c0bd..a8fc4782 100644 --- a/adscrawler/mitm_ad_parser/network_parsers.py +++ b/adscrawler/mitm_ad_parser/network_parsers.py @@ -1,5 +1,6 @@ import ast import base64 +import hashlib import html import json import re @@ -23,6 +24,8 @@ query_all_domains, query_api_call_id_for_uuid, query_store_app_by_store_id_cached, + query_urls_by_hashes, + query_urls_hash_map_cached, upsert_df, ) from adscrawler.mitm_ad_parser.models import AdInfo, MultipleAdvertiserIdError @@ -137,6 +140,9 @@ def extract_and_decode_urls(text: str) -> list[str]: urls.append(decoded_url) # print("DECODED:", decoded_url) all_urls = list(set(vast_urls + urls)) + not_urls = [x for x in all_urls if "://" not in x[0:48]] + all_urls = [x for x in all_urls if "://" in x[0:48]] + logger.info(f"Found {len(all_urls)} urls, dropping {len(not_urls)} not urls") return all_urls @@ -164,7 +170,7 @@ def check_click_urls( click_urls = [] for url in all_urls: redirect_urls = [] - if "/click" in url or "/clk" in url: + if "/click" in url or "/clk" in url or "onelink.me" in url: if "tpbid.com" in url: url = url.replace("fybernativebrowser://navigate?url=", "") redirect_urls = follow_url_redirects( @@ -225,7 +231,21 @@ def check_and_upsert_new_domains( def upsert_urls(urls: list[str], database_connection: PostgresCon) -> pd.DataFrame: """Upserts the URLs into the database.""" - new_urls_df = pd.DataFrame({"url": urls}) + urls_df = pd.DataFrame({"url": urls}) + url_hash_map = query_urls_hash_map_cached(database_connection=database_connection) + urls_df["url_hash"] = urls_df["url"].apply( + lambda x: hashlib.md5(x.encode()).hexdigest() + ) + new_urls_df = urls_df[ + ~urls_df["url_hash"].isin(url_hash_map["url_hash"].to_list()) + ].copy() + if new_urls_df.empty: + urls_df = urls_df.merge( + url_hash_map[["url_hash", "url_id"]], + on="url_hash", + how="left", + ) + return urls_df http_urls_df = new_urls_df[new_urls_df["url"].str.startswith("http")].copy() nonhttp_urls_df = new_urls_df[~new_urls_df["url"].str.startswith("http")].copy() http_urls_df["tld_url"] = http_urls_df["url"].apply(lambda x: get_tld(x)) @@ -248,17 +268,26 @@ def upsert_urls(urls: list[str], database_connection: PostgresCon) -> pd.DataFra new_urls_df["domain_id"] = np.where( pd.isna(new_urls_df["domain_id"]), None, new_urls_df["domain_id"] ) - urls_df: pd.DataFrame = upsert_df( + new_urls_df["url_hash"] = new_urls_df["url"].apply( + lambda x: hashlib.md5(x.encode()).hexdigest() + ) + logger.info(f"Upserting {new_urls_df.shape[0]} new urls") + new_urls_df: pd.DataFrame = upsert_df( df=new_urls_df, database_connection=database_connection, schema="adtech", table_name="urls", - insert_columns=["url", "domain_id", "scheme"], - key_columns=["url"], - md5_key_columns=["url"], + insert_columns=["url", "url_hash", "domain_id", "scheme"], + key_columns=["url_hash"], return_rows=True, ) - urls_df = urls_df.rename(columns={"id": "url_id"}) + new_urls_df = new_urls_df.rename(columns={"id": "url_id"}) + url_hash_map = pd.concat([url_hash_map, new_urls_df[["url_hash", "url_id"]]]) + urls_df = urls_df.merge( + url_hash_map[["url_hash", "url_id"]], + on="url_hash", + how="left", + ) return urls_df @@ -299,27 +328,31 @@ def follow_url_redirects( """ Follows redirects and returns the final URL. - Cache the results to avoid repeated requests. + Cache the results in the database to avoid repeated requests. """ existing_chain_df = get_click_url_redirect_chains(run_id, database_connection) if not existing_chain_df.empty and url in existing_chain_df["url"].to_list(): - existing_chain_df = existing_chain_df[existing_chain_df["url"] == url] - redirect_chain = existing_chain_df["redirect_url"].to_list() + existing_chain_df = existing_chain_df[ + (existing_chain_df["url"] == url) + & (existing_chain_df["api_call_id"] == api_call_id) + ] + if not existing_chain_df.empty: + redirect_chain = existing_chain_df["redirect_url"].to_list() + return redirect_chain + # New chain, insert after getting the chain + redirect_chain_dict = get_redirect_chain(url) + if len(redirect_chain_dict) > 0: + logger.info(f"Found new click redirects: {len(redirect_chain_dict)}") + chain_df = pd.DataFrame(redirect_chain_dict) + chain_df["run_id"] = run_id + chain_df["api_call_id"] = api_call_id + chain_df["url"] = url + upsert_click_url_redirect_chains(chain_df, database_connection) + redirect_chain = list( + set(chain_df["next_url"].to_list() + chain_df["url"].to_list()) + ) else: - # New chain, insert after getting the chain - redirect_chain_dict = get_redirect_chain(url) - if len(redirect_chain_dict) > 0: - logger.info(f"Found new click redirects: {len(redirect_chain_dict)}") - chain_df = pd.DataFrame(redirect_chain_dict) - chain_df["run_id"] = run_id - chain_df["api_call_id"] = api_call_id - chain_df["url"] = url - upsert_click_url_redirect_chains(chain_df, database_connection) - redirect_chain = list( - set(chain_df["next_url"].to_list() + chain_df["url"].to_list()) - ) - else: - redirect_chain = [] + redirect_chain = [] return redirect_chain @@ -377,10 +410,11 @@ def parse_urls_for_known_parts( if "websdk.appsflyer.com" in url: continue if "appsflyer.com" in tld_url: - adv_store_id = re.search( + matches = re.search( r"http.*\.appsflyer\.com/([a-zA-Z0-9_.]+)[\?\-]", url - )[1] - if adv_store_id: + ) + if matches and matches.group(1): + adv_store_id = matches.group(1) found_adv_store_ids.append(adv_store_id) elif match := re.search(r"intent://details\?id=([a-zA-Z0-9._]+)", url): adv_store_id = match.group(1) @@ -744,6 +778,13 @@ def parse_text_for_adinfo( except MultipleAdvertiserIdError as e: error_msg = f"multiple adv_store_id found for: {e.found_adv_store_ids}" logger.error(error_msg) + if click_urls and len(click_urls) > 0: + click_url_hashes = [hashlib.md5(url.encode()).hexdigest() for url in click_urls] + cdf = query_urls_by_hashes(click_url_hashes, database_connection) + if not cdf.empty: + ad_info.click_url_ids = cdf["id"].tolist() + else: + logger.error("Click URLs found but no URL IDs in DB") return ad_info, error_msg diff --git a/adscrawler/packages/apks/waydroid.py b/adscrawler/packages/apks/waydroid.py index 5450fc48..0f7afa75 100644 --- a/adscrawler/packages/apks/waydroid.py +++ b/adscrawler/packages/apks/waydroid.py @@ -451,6 +451,7 @@ def remove_all_third_party_apps() -> None: timeout=30, ) logger.info(f"{function_info} success") + os.system(f"sudo bash -c 'rm -rf {WAYDROID_MEDIA_DIR}/*'") THIRD_PARTY_APPS_TO_KEEP = ["org.mozilla.firefox", "io.github.huskydg.magisk"] diff --git a/adscrawler/packages/ipas/get_plist.py b/adscrawler/packages/ipas/get_plist.py index a26bf940..4da87240 100644 --- a/adscrawler/packages/ipas/get_plist.py +++ b/adscrawler/packages/ipas/get_plist.py @@ -88,6 +88,23 @@ def get_parsed_plist( plist_bytes = f.read() data = plistlib.loads(plist_bytes) plist_str = str(data) + # Interesting data in here, but also many keys + # jdata = data['CFBundleConfigDataJSONBase64'] + # jdata = data['CFBundlePagesJSONBase64'] + # if jdata: + # jdata = base64.b64decode(jdata).decode('utf-8') + # jdata = json.loads(jdata) + # jdata = pd.json_normalize(jdata) + # jdata = jdata[["path", "value"]] + # drop for now + try: + data.pop("CFBundleConfigDataJSONBase64") + except KeyError: + pass + try: + data.pop("CFBundlePagesJSONBase64") + except KeyError: + pass df = ( pd.json_normalize(data, sep="/") .T.explode(0) diff --git a/adscrawler/packages/process_files.py b/adscrawler/packages/process_files.py index 623888b4..5106c11c 100644 --- a/adscrawler/packages/process_files.py +++ b/adscrawler/packages/process_files.py @@ -9,7 +9,7 @@ insert_version_code, query_apps_to_download, query_apps_to_sdk_scan, - upsert_details_df, + upser_sdk_details_df, ) from adscrawler.packages.apks.download_apk import ( manage_apk_download, @@ -237,7 +237,7 @@ def process_sdks( index=False, ) if crawl_result == 1: - upsert_details_df( + upser_sdk_details_df( details_df=details_df, database_connection=database_connection, store_id=store_id, diff --git a/adscrawler/packages/storage.py b/adscrawler/packages/storage.py index e9633e3b..bf4c682f 100644 --- a/adscrawler/packages/storage.py +++ b/adscrawler/packages/storage.py @@ -467,7 +467,7 @@ def download_app_by_store_id( df = df[~(df["version_code"] == "failed")] if df.empty: logger.error(f"S3 only has failed apk for {store_id=}, no version_code") - if version_str: + if version_str and version_str != '-1': df = df[df["version_code"] == version_str] final_version_str = version_str else: diff --git a/adscrawler/packages/utils.py b/adscrawler/packages/utils.py index 202d6f5b..4d1e56ec 100644 --- a/adscrawler/packages/utils.py +++ b/adscrawler/packages/utils.py @@ -87,7 +87,7 @@ def unzip_apk(store_id: str, file_path: pathlib.Path) -> pathlib.Path: ) if "filename not matched" in output.stderr: unzip_command = f"unzip -o {file_path.as_posix()} base.apk -d {partial_apk_dir.as_posix()}" - output = subprocess.run( + _output = subprocess.run( unzip_command, shell=True, check=True, capture_output=True, timeout=60 ) base_apk_path = pathlib.Path(partial_apk_dir, "base.apk") @@ -181,30 +181,24 @@ def get_local_file_path(store: int, store_id: str) -> pathlib.Path | None: Returns: The file extension ('.apk' or '.xapk') if found, None otherwise """ - # Define all possible paths to check # In the future we would check version codes as well - apk_paths = [ pathlib.Path(APKS_DIR, f"{store_id}.apk"), pathlib.Path(XAPKS_DIR, f"{store_id}.xapk"), pathlib.Path(APKS_INCOMING_DIR, f"{store_id}.apk"), pathlib.Path(XAPKS_INCOMING_DIR, f"{store_id}.xapk"), ] - ipa_paths = [ pathlib.Path(IPAS_DIR, f"{store_id}.ipa"), pathlib.Path(IPAS_INCOMING_DIR, f"{store_id}.ipa"), ] - paths_to_check = apk_paths if store == 1 else ipa_paths - - logger.info(f"Checking {store_id=} if exists locally") - for path in paths_to_check: if path.exists(): + logger.info(f"{store_id=} {path=} exists locally") return path - + logger.info(f"{store_id=} no local file found") return None diff --git a/adscrawler/tools/example_contab.txt b/adscrawler/tools/example_contab.txt index b61cce75..31e8a074 100644 --- a/adscrawler/tools/example_contab.txt +++ b/adscrawler/tools/example_contab.txt @@ -45,7 +45,6 @@ # Zscores 16 2 * * * psql -d madrone -c "REFRESH MATERIALIZED VIEW store_app_z_scores" >> /var/log/postgresql/refresh_mv.log 2>&1 -24 2 * * * psql -d madrone -c "REFRESH MATERIALIZED VIEW CONCURRENTLY frontend.store_apps_z_scores" >> /var/log/postgresql/refresh_mv.log 2>&1 01 5 * * * psql -d madrone -c "REFRESH MATERIALIZED VIEW CONCURRENTLY frontend.adstxt_ad_domain_overview" >> /var/log/postgresql/refresh_mv.log 2>&1 31 5 * * * psql -d madrone -c "REFRESH MATERIALIZED VIEW CONCURRENTLY frontend.adstxt_publishers_overview" >> /var/log/postgresql/refresh_mv.log 2>&1 diff --git a/new_company.yml b/new_company.yml.example similarity index 54% rename from new_company.yml rename to new_company.yml.example index dcf39391..b614602c 100644 --- a/new_company.yml +++ b/new_company.yml.example @@ -1,8 +1,8 @@ -# Note this file is in gitignore -# If you have permanent changes please commit manually +# This is an example of a new company to add to the database +# Remove .example from the file name and fill in the details. domain: example.com -company_name: Example Company -sdk_name: Example SDK +company_name: Example +sdk_name: Example sdk_slug: example-sdk is_open_source: false has_third_party_tracking: true @@ -11,7 +11,6 @@ has_third_party_tracking: true # 3: Analytics: Product # 4: Development Tools # 5: Business Tools -category_id: 1 +category_id: 5 sdk_package_patterns: - - ExampleA - - com.example + - io.example \ No newline at end of file diff --git a/pg-ddl/schema/adtech/api_call_urls.sql b/pg-ddl/schema/adtech/api_call_urls.sql index c1a3a3a9..37cebd8f 100644 --- a/pg-ddl/schema/adtech/api_call_urls.sql +++ b/pg-ddl/schema/adtech/api_call_urls.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict uDFPP2pGlK6g2BYPkexYEKvBiWTQxAMYd9wM60ogd2lzh813f5pU0AWPqGKdLg9 +\restrict 1tPBeXEf4ffIdDfdDufKAkfttGwg2U2OsZwJ5dtTHpfkVqSkS1CfmuqsiZgPL9C --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -118,5 +118,5 @@ ALTER TABLE ONLY adtech.api_call_urls -- PostgreSQL database dump complete -- -\unrestrict uDFPP2pGlK6g2BYPkexYEKvBiWTQxAMYd9wM60ogd2lzh813f5pU0AWPqGKdLg9 +\unrestrict 1tPBeXEf4ffIdDfdDufKAkfttGwg2U2OsZwJ5dtTHpfkVqSkS1CfmuqsiZgPL9C diff --git a/pg-ddl/schema/adtech/categories.sql b/pg-ddl/schema/adtech/categories.sql index fedbd78d..df468598 100644 --- a/pg-ddl/schema/adtech/categories.sql +++ b/pg-ddl/schema/adtech/categories.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict b4iLQRR3nKhpUKMKN3S9qg9izzTZVpkU9AkcXh3vLdnCnrIbkKAb8rvAgBOcp6m +\restrict nLxBi8Vbrn9RueuyAWsnxTQqrcZ6ZYhdhoeI38zvwpSwM7UoRHzMoAQJfEV8LDk --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -62,5 +62,5 @@ ALTER TABLE ONLY adtech.categories -- PostgreSQL database dump complete -- -\unrestrict b4iLQRR3nKhpUKMKN3S9qg9izzTZVpkU9AkcXh3vLdnCnrIbkKAb8rvAgBOcp6m +\unrestrict nLxBi8Vbrn9RueuyAWsnxTQqrcZ6ZYhdhoeI38zvwpSwM7UoRHzMoAQJfEV8LDk diff --git a/pg-ddl/schema/adtech/combined_store_apps_companies_2025_h1__matview.sql b/pg-ddl/schema/adtech/combined_store_apps_companies_2025_h1__matview.sql new file mode 100644 index 00000000..88468c20 --- /dev/null +++ b/pg-ddl/schema/adtech/combined_store_apps_companies_2025_h1__matview.sql @@ -0,0 +1,128 @@ +-- +-- PostgreSQL database dump +-- + +\restrict UHBIiWOeqr2j04WCvHMbNb5u3NSgZjcwlNm5f53u24HAey40dt803uEyLJGc6zA + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: combined_store_apps_companies_2025_h1; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.combined_store_apps_companies_2025_h1 AS + WITH api_based_companies AS ( + SELECT DISTINCT saac.store_app, + cm.mapped_category AS app_category, + cdm.company_id, + c_1.parent_company_id AS parent_id, + 'api_call'::text AS tag_source, + COALESCE(cad_1.domain_name, (saac.tld_url)::character varying) AS ad_domain + FROM ((((((public.api_calls saac + LEFT JOIN public.store_apps sa_1 ON ((saac.store_app = sa_1.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + LEFT JOIN public.domains ad_1 ON ((saac.tld_url = (ad_1.domain_name)::text))) + LEFT JOIN adtech.company_domain_mapping cdm ON ((ad_1.id = cdm.domain_id))) + LEFT JOIN adtech.companies c_1 ON ((cdm.company_id = c_1.id))) + LEFT JOIN public.domains cad_1 ON ((c_1.domain_id = cad_1.id))) + WHERE ((saac.called_at >= '2025-01-01 00:00:00'::timestamp without time zone) AND (saac.called_at < '2025-07-01 00:00:00'::timestamp without time zone)) + ), developer_based_companies AS ( + SELECT DISTINCT sa_1.id AS store_app, + cm.mapped_category AS app_category, + cd.company_id, + d.domain_name AS ad_domain, + 'developer'::text AS tag_source, + COALESCE(c_1.parent_company_id, cd.company_id) AS parent_id + FROM ((((adtech.company_developers cd + LEFT JOIN public.store_apps sa_1 ON ((cd.developer_id = sa_1.developer))) + LEFT JOIN adtech.companies c_1 ON ((cd.company_id = c_1.id))) + LEFT JOIN public.domains d ON ((c_1.domain_id = d.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + ), sdk_based_companies AS ( + SELECT DISTINCT sasd.store_app, + cm.mapped_category AS app_category, + sac.company_id, + ad_1.domain_name AS ad_domain, + 'sdk'::text AS tag_source, + COALESCE(c_1.parent_company_id, sac.company_id) AS parent_id + FROM (((((adtech.store_app_sdk_strings_2025_h1 sasd + LEFT JOIN adtech.sdks sac ON ((sac.id = sasd.sdk_id))) + LEFT JOIN adtech.companies c_1 ON ((sac.company_id = c_1.id))) + LEFT JOIN public.domains ad_1 ON ((c_1.domain_id = ad_1.id))) + LEFT JOIN public.store_apps sa_1 ON ((sasd.store_app = sa_1.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + ), distinct_ad_and_pub_domains AS ( + SELECT DISTINCT pd.domain_name AS publisher_domain_url, + ad_1.domain_name AS ad_domain_url, + aae.relationship + FROM ((((public.app_ads_entrys aae + LEFT JOIN public.domains ad_1 ON ((aae.ad_domain = ad_1.id))) + LEFT JOIN public.app_ads_map aam ON ((aae.id = aam.app_ads_entry))) + LEFT JOIN public.domains pd ON ((aam.pub_domain = pd.id))) + LEFT JOIN public.adstxt_crawl_results pdcr ON ((pd.id = pdcr.domain_id))) + WHERE ((pdcr.crawled_at - aam.updated_at) < '01:00:00'::interval) + ), combined_sources AS ( + SELECT api_based_companies.store_app, + api_based_companies.app_category, + api_based_companies.company_id, + api_based_companies.parent_id, + api_based_companies.ad_domain, + api_based_companies.tag_source + FROM api_based_companies + UNION ALL + SELECT sdk_based_companies.store_app, + sdk_based_companies.app_category, + sdk_based_companies.company_id, + sdk_based_companies.parent_id, + sdk_based_companies.ad_domain, + sdk_based_companies.tag_source + FROM sdk_based_companies + ) + SELECT cs.ad_domain, + cs.store_app, + sa.category AS app_category, + c.id AS company_id, + COALESCE(c.parent_company_id, c.id) AS parent_id, + CASE + WHEN (sa.sdk_successful_last_crawled IS NOT NULL) THEN bool_or((cs.tag_source = 'sdk'::text)) + ELSE NULL::boolean + END AS sdk, + CASE + WHEN (sa.api_successful_last_crawled IS NOT NULL) THEN bool_or((cs.tag_source = 'api_call'::text)) + ELSE NULL::boolean + END AS api_call, + bool_or((cs.tag_source = 'app_ads_direct'::text)) AS app_ads_direct, + bool_or((cs.tag_source = 'app_ads_reseller'::text)) AS app_ads_reseller + FROM (((combined_sources cs + LEFT JOIN frontend.store_apps_overview sa ON ((cs.store_app = sa.id))) + LEFT JOIN public.domains ad ON (((cs.ad_domain)::text = (ad.domain_name)::text))) + LEFT JOIN adtech.companies c ON ((ad.id = c.domain_id))) + GROUP BY cs.ad_domain, cs.store_app, sa.category, c.id, c.parent_company_id, sa.sdk_successful_last_crawled, sa.api_successful_last_crawled + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.combined_store_apps_companies_2025_h1 OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict UHBIiWOeqr2j04WCvHMbNb5u3NSgZjcwlNm5f53u24HAey40dt803uEyLJGc6zA + diff --git a/pg-ddl/schema/adtech/combined_store_apps_companies_2025_h2__matview.sql b/pg-ddl/schema/adtech/combined_store_apps_companies_2025_h2__matview.sql new file mode 100644 index 00000000..bb2852a0 --- /dev/null +++ b/pg-ddl/schema/adtech/combined_store_apps_companies_2025_h2__matview.sql @@ -0,0 +1,128 @@ +-- +-- PostgreSQL database dump +-- + +\restrict dyxRVqQQ9CsqbLhVM3QXs1QpLhJHXpNtjG22yCPga7UeVhqaIwEONX9sqOIhFUq + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: combined_store_apps_companies_2025_h2; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.combined_store_apps_companies_2025_h2 AS + WITH api_based_companies AS ( + SELECT DISTINCT saac.store_app, + cm.mapped_category AS app_category, + cdm.company_id, + c_1.parent_company_id AS parent_id, + 'api_call'::text AS tag_source, + COALESCE(cad_1.domain_name, (saac.tld_url)::character varying) AS ad_domain + FROM ((((((public.api_calls saac + LEFT JOIN public.store_apps sa_1 ON ((saac.store_app = sa_1.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + LEFT JOIN public.domains ad_1 ON ((saac.tld_url = (ad_1.domain_name)::text))) + LEFT JOIN adtech.company_domain_mapping cdm ON ((ad_1.id = cdm.domain_id))) + LEFT JOIN adtech.companies c_1 ON ((cdm.company_id = c_1.id))) + LEFT JOIN public.domains cad_1 ON ((c_1.domain_id = cad_1.id))) + WHERE ((saac.called_at >= '2025-01-01 00:00:00'::timestamp without time zone) AND (saac.called_at < '2026-01-01 00:00:00'::timestamp without time zone)) + ), developer_based_companies AS ( + SELECT DISTINCT sa_1.id AS store_app, + cm.mapped_category AS app_category, + cd.company_id, + d.domain_name AS ad_domain, + 'developer'::text AS tag_source, + COALESCE(c_1.parent_company_id, cd.company_id) AS parent_id + FROM ((((adtech.company_developers cd + LEFT JOIN public.store_apps sa_1 ON ((cd.developer_id = sa_1.developer))) + LEFT JOIN adtech.companies c_1 ON ((cd.company_id = c_1.id))) + LEFT JOIN public.domains d ON ((c_1.domain_id = d.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + ), sdk_based_companies AS ( + SELECT DISTINCT sasd.store_app, + cm.mapped_category AS app_category, + sac.company_id, + ad_1.domain_name AS ad_domain, + 'sdk'::text AS tag_source, + COALESCE(c_1.parent_company_id, sac.company_id) AS parent_id + FROM (((((adtech.store_app_sdk_strings_2025_h2 sasd + LEFT JOIN adtech.sdks sac ON ((sac.id = sasd.sdk_id))) + LEFT JOIN adtech.companies c_1 ON ((sac.company_id = c_1.id))) + LEFT JOIN public.domains ad_1 ON ((c_1.domain_id = ad_1.id))) + LEFT JOIN public.store_apps sa_1 ON ((sasd.store_app = sa_1.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + ), distinct_ad_and_pub_domains AS ( + SELECT DISTINCT pd.domain_name AS publisher_domain_url, + ad_1.domain_name AS ad_domain_url, + aae.relationship + FROM ((((public.app_ads_entrys aae + LEFT JOIN public.domains ad_1 ON ((aae.ad_domain = ad_1.id))) + LEFT JOIN public.app_ads_map aam ON ((aae.id = aam.app_ads_entry))) + LEFT JOIN public.domains pd ON ((aam.pub_domain = pd.id))) + LEFT JOIN public.adstxt_crawl_results pdcr ON ((pd.id = pdcr.domain_id))) + WHERE ((pdcr.crawled_at - aam.updated_at) < '01:00:00'::interval) + ), combined_sources AS ( + SELECT api_based_companies.store_app, + api_based_companies.app_category, + api_based_companies.company_id, + api_based_companies.parent_id, + api_based_companies.ad_domain, + api_based_companies.tag_source + FROM api_based_companies + UNION ALL + SELECT sdk_based_companies.store_app, + sdk_based_companies.app_category, + sdk_based_companies.company_id, + sdk_based_companies.parent_id, + sdk_based_companies.ad_domain, + sdk_based_companies.tag_source + FROM sdk_based_companies + ) + SELECT cs.ad_domain, + cs.store_app, + sa.category AS app_category, + c.id AS company_id, + COALESCE(c.parent_company_id, c.id) AS parent_id, + CASE + WHEN (sa.sdk_successful_last_crawled IS NOT NULL) THEN bool_or((cs.tag_source = 'sdk'::text)) + ELSE NULL::boolean + END AS sdk, + CASE + WHEN (sa.api_successful_last_crawled IS NOT NULL) THEN bool_or((cs.tag_source = 'api_call'::text)) + ELSE NULL::boolean + END AS api_call, + bool_or((cs.tag_source = 'app_ads_direct'::text)) AS app_ads_direct, + bool_or((cs.tag_source = 'app_ads_reseller'::text)) AS app_ads_reseller + FROM (((combined_sources cs + LEFT JOIN frontend.store_apps_overview sa ON ((cs.store_app = sa.id))) + LEFT JOIN public.domains ad ON (((cs.ad_domain)::text = (ad.domain_name)::text))) + LEFT JOIN adtech.companies c ON ((ad.id = c.domain_id))) + GROUP BY cs.ad_domain, cs.store_app, sa.category, c.id, c.parent_company_id, sa.sdk_successful_last_crawled, sa.api_successful_last_crawled + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.combined_store_apps_companies_2025_h2 OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict dyxRVqQQ9CsqbLhVM3QXs1QpLhJHXpNtjG22yCPga7UeVhqaIwEONX9sqOIhFUq + diff --git a/pg-ddl/schema/adtech/combined_store_apps_companies__matview.sql b/pg-ddl/schema/adtech/combined_store_apps_companies__matview.sql index b0ec0ef7..6ca10853 100644 --- a/pg-ddl/schema/adtech/combined_store_apps_companies__matview.sql +++ b/pg-ddl/schema/adtech/combined_store_apps_companies__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict F8pep8itUfBIYOcumKIEWaUxsCpSKT9PjUK4tAjJwdp967PvzjLay6R11LA3hXh +\restrict cTQB8uupTzy0hzbw353JUOSj5k5HKiG2cxIf0H49RmYqSNmIIwLA7fLyCsrQcib --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -55,16 +55,17 @@ CREATE MATERIALIZED VIEW adtech.combined_store_apps_companies AS LEFT JOIN public.domains d ON ((c_1.domain_id = d.id))) LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) ), sdk_based_companies AS ( - SELECT DISTINCT sac.store_app, + SELECT DISTINCT sasd.store_app, cm.mapped_category AS app_category, sac.company_id, ad_1.domain_name AS ad_domain, 'sdk'::text AS tag_source, COALESCE(c_1.parent_company_id, sac.company_id) AS parent_id - FROM ((((adtech.store_app_sdk_strings sac + FROM (((((adtech.store_app_sdk_strings sasd + LEFT JOIN adtech.sdks sac ON ((sac.id = sasd.sdk_id))) LEFT JOIN adtech.companies c_1 ON ((sac.company_id = c_1.id))) LEFT JOIN public.domains ad_1 ON ((c_1.domain_id = ad_1.id))) - LEFT JOIN public.store_apps sa_1 ON ((sac.store_app = sa_1.id))) + LEFT JOIN public.store_apps sa_1 ON ((sasd.store_app = sa_1.id))) LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) ), distinct_ad_and_pub_domains AS ( SELECT DISTINCT pd.domain_name AS publisher_domain_url, @@ -164,5 +165,5 @@ CREATE UNIQUE INDEX combined_store_app_companies_idx ON adtech.combined_store_ap -- PostgreSQL database dump complete -- -\unrestrict F8pep8itUfBIYOcumKIEWaUxsCpSKT9PjUK4tAjJwdp967PvzjLay6R11LA3hXh +\unrestrict cTQB8uupTzy0hzbw353JUOSj5k5HKiG2cxIf0H49RmYqSNmIIwLA7fLyCsrQcib diff --git a/pg-ddl/schema/adtech/combined_store_apps_parent_companies__matview.sql b/pg-ddl/schema/adtech/combined_store_apps_parent_companies__matview.sql index 8fd02ad3..0332024b 100644 --- a/pg-ddl/schema/adtech/combined_store_apps_parent_companies__matview.sql +++ b/pg-ddl/schema/adtech/combined_store_apps_parent_companies__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 2pWfPSYGHwzSYpdkKPcukNJuTP7lOg4oVv65giH6MGHk1SCqzE4l4NorXuEGtbA +\restrict eA1lgEQPmLV8X6iVzw0xJf3Qo6XdM42KVvslsnMJ390fw0LTDRhnT3tzcWrQrEZ --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -59,5 +59,5 @@ CREATE UNIQUE INDEX idx_combined_store_apps_parent_companies_idx ON adtech.combi -- PostgreSQL database dump complete -- -\unrestrict 2pWfPSYGHwzSYpdkKPcukNJuTP7lOg4oVv65giH6MGHk1SCqzE4l4NorXuEGtbA +\unrestrict eA1lgEQPmLV8X6iVzw0xJf3Qo6XdM42KVvslsnMJ390fw0LTDRhnT3tzcWrQrEZ diff --git a/pg-ddl/schema/adtech/companies.sql b/pg-ddl/schema/adtech/companies.sql index 14487de2..698148ed 100644 --- a/pg-ddl/schema/adtech/companies.sql +++ b/pg-ddl/schema/adtech/companies.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict Qf670jVSJ8EZ1VqE1PyX5IRRtZ2x07skAiUc80RuhST4NlW4avkJaJ8FeFNdczU +\restrict 7xmkOd90iwLy0XW6UGKgevOcgg9TnCV4axmxAXW12vtdHUUXDIFoRUgXtGJVzKN --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -98,5 +98,5 @@ ALTER TABLE ONLY adtech.companies -- PostgreSQL database dump complete -- -\unrestrict Qf670jVSJ8EZ1VqE1PyX5IRRtZ2x07skAiUc80RuhST4NlW4avkJaJ8FeFNdczU +\unrestrict 7xmkOd90iwLy0XW6UGKgevOcgg9TnCV4axmxAXW12vtdHUUXDIFoRUgXtGJVzKN diff --git a/pg-ddl/schema/adtech/company_categories__matview.sql b/pg-ddl/schema/adtech/company_categories__matview.sql index 157f2ba2..b951547b 100644 --- a/pg-ddl/schema/adtech/company_categories__matview.sql +++ b/pg-ddl/schema/adtech/company_categories__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict VznbPr77kK1ZXcUyEOfFvotAKrRdtQQwXN97esdw4rzToFT0wtfAcoFGsOxhi6i +\restrict 9S3XVAmQQ1EPxk5ftfDi7mSfOnddfqocDFdaux3GN2P3yaFAOebUhu03RnP5bvs --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -42,5 +42,5 @@ ALTER MATERIALIZED VIEW adtech.company_categories OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict VznbPr77kK1ZXcUyEOfFvotAKrRdtQQwXN97esdw4rzToFT0wtfAcoFGsOxhi6i +\unrestrict 9S3XVAmQQ1EPxk5ftfDi7mSfOnddfqocDFdaux3GN2P3yaFAOebUhu03RnP5bvs diff --git a/pg-ddl/schema/adtech/company_developers.sql b/pg-ddl/schema/adtech/company_developers.sql index 2d8aaa93..6a463652 100644 --- a/pg-ddl/schema/adtech/company_developers.sql +++ b/pg-ddl/schema/adtech/company_developers.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict LL1e8kNzajKbIW4d5bp5pLMDARGMeTeSF8LqWTylB8m8XxQ7B6SsV7Zk60pEtj0 +\restrict QuEOenl189YW7RaZkxbzkZhLr4gc5jkjSehYwbV3yk5k3ZqJfcXAxBIrp6Lj6Yn --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -55,5 +55,5 @@ ALTER TABLE ONLY adtech.company_developers -- PostgreSQL database dump complete -- -\unrestrict LL1e8kNzajKbIW4d5bp5pLMDARGMeTeSF8LqWTylB8m8XxQ7B6SsV7Zk60pEtj0 +\unrestrict QuEOenl189YW7RaZkxbzkZhLr4gc5jkjSehYwbV3yk5k3ZqJfcXAxBIrp6Lj6Yn diff --git a/pg-ddl/schema/adtech/company_domain_mapping.sql b/pg-ddl/schema/adtech/company_domain_mapping.sql index 0bb3d551..5ae41d6e 100644 --- a/pg-ddl/schema/adtech/company_domain_mapping.sql +++ b/pg-ddl/schema/adtech/company_domain_mapping.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict eMu2gva4x2Ohnos0ZYV29GM7KleMpHMX8HaVJ1jywCUXRqNh5twgR8UgfvdI6HY +\restrict 46mKhqmhnUys8q9NRXzwTqsTG0UzK0BuyRQ7Tm30tz30dXXM4zSjLcoKm26o8VW --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -63,5 +63,5 @@ ALTER TABLE ONLY adtech.company_domain_mapping -- PostgreSQL database dump complete -- -\unrestrict eMu2gva4x2Ohnos0ZYV29GM7KleMpHMX8HaVJ1jywCUXRqNh5twgR8UgfvdI6HY +\unrestrict 46mKhqmhnUys8q9NRXzwTqsTG0UzK0BuyRQ7Tm30tz30dXXM4zSjLcoKm26o8VW diff --git a/pg-ddl/schema/adtech/company_mediation_adapters.sql b/pg-ddl/schema/adtech/company_mediation_adapters.sql new file mode 100644 index 00000000..0547ad32 --- /dev/null +++ b/pg-ddl/schema/adtech/company_mediation_adapters.sql @@ -0,0 +1,59 @@ +-- +-- PostgreSQL database dump +-- + +\restrict H5B7Ak5UsxQUn5WVFu0wg6xuASM7weaxNfI8Uon5YHnjs7leUagJYBIUYfH8X5f + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: company_mediation_adapters; Type: TABLE; Schema: adtech; Owner: postgres +-- + +CREATE TABLE adtech.company_mediation_adapters ( + company_id integer NOT NULL, + adapter_pattern character varying(100) NOT NULL +); + + +ALTER TABLE adtech.company_mediation_adapters OWNER TO postgres; + +-- +-- Name: company_mediation_adapters company_mediation_adapters_pkey; Type: CONSTRAINT; Schema: adtech; Owner: postgres +-- + +ALTER TABLE ONLY adtech.company_mediation_adapters + ADD CONSTRAINT company_mediation_adapters_pkey PRIMARY KEY (company_id, adapter_pattern); + + +-- +-- Name: company_mediation_adapters company_mediation_adapters_company_id_fkey; Type: FK CONSTRAINT; Schema: adtech; Owner: postgres +-- + +ALTER TABLE ONLY adtech.company_mediation_adapters + ADD CONSTRAINT company_mediation_adapters_company_id_fkey FOREIGN KEY (company_id) REFERENCES adtech.companies(id); + + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict H5B7Ak5UsxQUn5WVFu0wg6xuASM7weaxNfI8Uon5YHnjs7leUagJYBIUYfH8X5f + diff --git a/pg-ddl/schema/adtech/company_sdk_strings__matview.sql b/pg-ddl/schema/adtech/company_sdk_strings__matview.sql deleted file mode 100644 index e02cc4cb..00000000 --- a/pg-ddl/schema/adtech/company_sdk_strings__matview.sql +++ /dev/null @@ -1,70 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict E09Dk3OoeJInNfSOA1f9kQUdUkC8qA9g1W4fNbUtwe59R35WEeLQ3TEyAzKFG3H - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: company_sdk_strings; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres --- - -CREATE MATERIALIZED VIEW adtech.company_sdk_strings AS - WITH matched_value_patterns AS ( - SELECT DISTINCT lower(vd.value_name) AS value_name_lower, - sd.company_id - FROM ((public.version_strings vd - JOIN adtech.sdk_packages sp ON ((lower(vd.value_name) ~~ (lower((sp.package_pattern)::text) || '%'::text)))) - JOIN adtech.sdks sd ON ((sp.sdk_id = sd.id))) - ), matched_path_patterns AS ( - SELECT DISTINCT lower(vd.xml_path) AS xml_path_lower, - sd.company_id - FROM ((public.version_strings vd - JOIN adtech.sdk_paths ptm ON ((lower(vd.xml_path) = lower((ptm.path_pattern)::text)))) - JOIN adtech.sdks sd ON ((ptm.sdk_id = sd.id))) - ) - SELECT vs.id AS version_string_id, - mp.company_id - FROM (matched_value_patterns mp - JOIN public.version_strings vs ON ((lower(vs.value_name) = mp.value_name_lower))) -UNION - SELECT vs.id AS version_string_id, - mp.company_id - FROM (matched_path_patterns mp - JOIN public.version_strings vs ON ((lower(vs.xml_path) = mp.xml_path_lower))) - WITH NO DATA; - - -ALTER MATERIALIZED VIEW adtech.company_sdk_strings OWNER TO postgres; - --- --- Name: company_sdk_strings_version_string_id_company_id_idx; Type: INDEX; Schema: adtech; Owner: postgres --- - -CREATE UNIQUE INDEX company_sdk_strings_version_string_id_company_id_idx ON adtech.company_sdk_strings USING btree (version_string_id, company_id); - - --- --- PostgreSQL database dump complete --- - -\unrestrict E09Dk3OoeJInNfSOA1f9kQUdUkC8qA9g1W4fNbUtwe59R35WEeLQ3TEyAzKFG3H - diff --git a/pg-ddl/schema/adtech/company_share_change_2025__matview.sql b/pg-ddl/schema/adtech/company_share_change_2025__matview.sql new file mode 100644 index 00000000..83bca648 --- /dev/null +++ b/pg-ddl/schema/adtech/company_share_change_2025__matview.sql @@ -0,0 +1,114 @@ +-- +-- PostgreSQL database dump +-- + +\restrict X0fRl2xTbouXhIX9foQlqLBCB9DheGhUhHD7dZF4NH5XMC8UyaAH563Hc53EPnn + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: company_share_change_2025; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.company_share_change_2025 AS + WITH limit_apps AS ( + SELECT DISTINCT store_app_ranks_weekly.store_app + FROM frontend.store_app_ranks_weekly + WHERE ((store_app_ranks_weekly.crawled_date >= '2025-01-01'::date) AND (store_app_ranks_weekly.crawled_date < '2026-01-01'::date)) + ), totals AS ( + SELECT 'h1'::text AS half, + count(DISTINCT combined_store_apps_companies_2025_h1.store_app) AS total_apps + FROM adtech.combined_store_apps_companies_2025_h1 + WHERE (combined_store_apps_companies_2025_h1.sdk AND (combined_store_apps_companies_2025_h1.store_app IN ( SELECT limit_apps.store_app + FROM limit_apps))) + UNION ALL + SELECT 'h2'::text AS half, + count(DISTINCT combined_store_apps_companies_2025_h2.store_app) AS total_apps + FROM adtech.combined_store_apps_companies_2025_h2 + WHERE (combined_store_apps_companies_2025_h2.sdk AND (combined_store_apps_companies_2025_h2.store_app IN ( SELECT limit_apps.store_app + FROM limit_apps))) + ), domain_counts AS ( + SELECT 'h1'::text AS half, + combined_store_apps_companies_2025_h1.ad_domain, + count(DISTINCT combined_store_apps_companies_2025_h1.store_app) AS app_count + FROM adtech.combined_store_apps_companies_2025_h1 + WHERE (combined_store_apps_companies_2025_h1.sdk AND (combined_store_apps_companies_2025_h1.store_app IN ( SELECT limit_apps.store_app + FROM limit_apps))) + GROUP BY combined_store_apps_companies_2025_h1.ad_domain + UNION ALL + SELECT 'h2'::text AS half, + combined_store_apps_companies_2025_h2.ad_domain, + count(DISTINCT combined_store_apps_companies_2025_h2.store_app) AS app_count + FROM adtech.combined_store_apps_companies_2025_h2 + WHERE (combined_store_apps_companies_2025_h2.sdk AND (combined_store_apps_companies_2025_h2.store_app IN ( SELECT limit_apps.store_app + FROM limit_apps))) + GROUP BY combined_store_apps_companies_2025_h2.ad_domain + ), shares AS ( + SELECT d.half, + d.ad_domain, + d.app_count, + t.total_apps, + ((d.app_count)::numeric / (NULLIF(t.total_apps, 0))::numeric) AS pct_share + FROM (domain_counts d + JOIN totals t ON ((t.half = d.half))) + ), shares_h1 AS ( + SELECT shares.half, + shares.ad_domain, + shares.app_count, + shares.total_apps, + shares.pct_share + FROM shares + WHERE (shares.half = 'h1'::text) + ), shares_h2 AS ( + SELECT shares.half, + shares.ad_domain, + shares.app_count, + shares.total_apps, + shares.pct_share + FROM shares + WHERE (shares.half = 'h2'::text) + ) + SELECT COALESCE(s2.ad_domain, s1.ad_domain) AS ad_domain, + s1.app_count AS apps_h1, + s1.total_apps AS total_apps_h1, + round((COALESCE(s1.pct_share, (0)::numeric) * (100)::numeric), 4) AS share_h1_pct, + s2.app_count AS apps_h2, + s2.total_apps AS total_apps_h2, + round((COALESCE(s2.pct_share, (0)::numeric) * (100)::numeric), 4) AS share_h2_pct, + (COALESCE(s2.app_count, (0)::bigint) - COALESCE(s1.app_count, (0)::bigint)) AS net_app_change, + CASE + WHEN ((s1.app_count IS NULL) OR (s1.app_count = 0)) THEN 100.00 + ELSE round(((((COALESCE(s2.app_count, (0)::bigint) - s1.app_count))::numeric / (s1.app_count)::numeric) * (100)::numeric), 2) + END AS app_growth_pct, + round(((COALESCE(s2.pct_share, (0)::numeric) - COALESCE(s1.pct_share, (0)::numeric)) * (100)::numeric), 6) AS share_change_pp + FROM (shares_h1 s1 + FULL JOIN shares_h2 s2 ON (((s1.ad_domain)::text = (s2.ad_domain)::text))) + ORDER BY (round(((COALESCE(s2.pct_share, (0)::numeric) - COALESCE(s1.pct_share, (0)::numeric)) * (100)::numeric), 6)) DESC NULLS LAST + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.company_share_change_2025 OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict X0fRl2xTbouXhIX9foQlqLBCB9DheGhUhHD7dZF4NH5XMC8UyaAH563Hc53EPnn + diff --git a/pg-ddl/schema/adtech/company_shares_2025_common__matview.sql b/pg-ddl/schema/adtech/company_shares_2025_common__matview.sql new file mode 100644 index 00000000..53a2b6f0 --- /dev/null +++ b/pg-ddl/schema/adtech/company_shares_2025_common__matview.sql @@ -0,0 +1,84 @@ +-- +-- PostgreSQL database dump +-- + +\restrict 36rDwaOswJ66Zmojq1UGmtAOHAeRfVVTthUAvhW73npNCvr965iXXBiMBRpcPjU + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: company_shares_2025_common; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.company_shares_2025_common AS + WITH common_apps AS ( + SELECT h1.store_app + FROM adtech.store_app_sdk_strings_2025_h1 h1 + INTERSECT + SELECT h2.store_app + FROM adtech.store_app_sdk_strings_2025_h2 h2 + ), h1_stats AS ( + SELECT sd.company_id, + count(DISTINCT store_app_sdk_strings_2025_h1.store_app) AS h1_app_count + FROM (adtech.store_app_sdk_strings_2025_h1 + JOIN adtech.sdks sd ON ((store_app_sdk_strings_2025_h1.sdk_id = sd.id))) + WHERE (store_app_sdk_strings_2025_h1.store_app IN ( SELECT common_apps.store_app + FROM common_apps)) + GROUP BY sd.company_id + ), h2_stats AS ( + SELECT sd.company_id, + count(DISTINCT store_app_sdk_strings_2025_h2.store_app) AS h2_app_count + FROM (adtech.store_app_sdk_strings_2025_h2 + JOIN adtech.sdks sd ON ((store_app_sdk_strings_2025_h2.sdk_id = sd.id))) + WHERE (store_app_sdk_strings_2025_h2.store_app IN ( SELECT common_apps.store_app + FROM common_apps)) + GROUP BY sd.company_id + ), comb AS ( + SELECT COALESCE(h1.company_id, h2.company_id) AS sdk_company_id, + ( SELECT count(*) AS count + FROM common_apps) AS total_app_count, + h1.h1_app_count, + h2.h2_app_count, + (h2.h2_app_count - h1.h1_app_count) AS net_migration, + round((((h2.h2_app_count)::numeric / (h1.h1_app_count)::numeric) - (1)::numeric), 4) AS round + FROM (h1_stats h1 + FULL JOIN h2_stats h2 ON ((h1.company_id = h2.company_id))) + ) + SELECT co.sdk_company_id, + co.total_app_count, + co.h1_app_count, + co.h2_app_count, + co.net_migration, + co.round, + d.domain_name AS company_domain + FROM ((comb co + LEFT JOIN adtech.companies c ON ((co.sdk_company_id = c.id))) + LEFT JOIN public.domains d ON ((d.id = c.domain_id))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.company_shares_2025_common OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict 36rDwaOswJ66Zmojq1UGmtAOHAeRfVVTthUAvhW73npNCvr965iXXBiMBRpcPjU + diff --git a/pg-ddl/schema/adtech/sdk_categories.sql b/pg-ddl/schema/adtech/sdk_categories.sql index bcb7ae92..2cb9a3ef 100644 --- a/pg-ddl/schema/adtech/sdk_categories.sql +++ b/pg-ddl/schema/adtech/sdk_categories.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict H8iBedwajOoL4vbCaoLqRWpbzCcmamBUVlcLBnOemu4CBDIq4MgqJf2oMw6vi0Y +\restrict xhU1gNJajxnXUp0JTTkjEhHDvuztDQxKFhcPeH5e7WnWP6IPKCr2zpgedHUDYLO --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -63,5 +63,5 @@ ALTER TABLE ONLY adtech.sdk_categories -- PostgreSQL database dump complete -- -\unrestrict H8iBedwajOoL4vbCaoLqRWpbzCcmamBUVlcLBnOemu4CBDIq4MgqJf2oMw6vi0Y +\unrestrict xhU1gNJajxnXUp0JTTkjEhHDvuztDQxKFhcPeH5e7WnWP6IPKCr2zpgedHUDYLO diff --git a/pg-ddl/schema/adtech/sdk_mediation_patterns.sql b/pg-ddl/schema/adtech/sdk_mediation_patterns.sql new file mode 100644 index 00000000..873e8faa --- /dev/null +++ b/pg-ddl/schema/adtech/sdk_mediation_patterns.sql @@ -0,0 +1,59 @@ +-- +-- PostgreSQL database dump +-- + +\restrict fm7YAh5TcWp40gsOQqhFfzTievV6PqCfmircIx1YPOenaZM3Jh2RA1BiyF5bPU8 + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: sdk_mediation_patterns; Type: TABLE; Schema: adtech; Owner: postgres +-- + +CREATE TABLE adtech.sdk_mediation_patterns ( + sdk_id integer NOT NULL, + mediation_pattern character varying(255) NOT NULL +); + + +ALTER TABLE adtech.sdk_mediation_patterns OWNER TO postgres; + +-- +-- Name: sdk_mediation_patterns company_mediation_patterns_pkey; Type: CONSTRAINT; Schema: adtech; Owner: postgres +-- + +ALTER TABLE ONLY adtech.sdk_mediation_patterns + ADD CONSTRAINT company_mediation_patterns_pkey PRIMARY KEY (sdk_id, mediation_pattern); + + +-- +-- Name: sdk_mediation_patterns company_mediation_patterns_company_id_fkey; Type: FK CONSTRAINT; Schema: adtech; Owner: postgres +-- + +ALTER TABLE ONLY adtech.sdk_mediation_patterns + ADD CONSTRAINT company_mediation_patterns_company_id_fkey FOREIGN KEY (sdk_id) REFERENCES adtech.sdks(id); + + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict fm7YAh5TcWp40gsOQqhFfzTievV6PqCfmircIx1YPOenaZM3Jh2RA1BiyF5bPU8 + diff --git a/pg-ddl/schema/adtech/sdk_packages.sql b/pg-ddl/schema/adtech/sdk_packages.sql index dc6e07e5..ccc1c600 100644 --- a/pg-ddl/schema/adtech/sdk_packages.sql +++ b/pg-ddl/schema/adtech/sdk_packages.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 1ZSSRKQMYQ2QlEITzdE6HLvxOJCOkaZnaseJMIin8G6qhPdlER5DbM8S5obfcXp +\restrict Yz6EL5HG3w79aM83op8RuJfyRgcJs4UovzGt0NGdcasgZ0TJacVJxAtFq8hrt0N --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -99,5 +99,5 @@ ALTER TABLE ONLY adtech.sdk_packages -- PostgreSQL database dump complete -- -\unrestrict 1ZSSRKQMYQ2QlEITzdE6HLvxOJCOkaZnaseJMIin8G6qhPdlER5DbM8S5obfcXp +\unrestrict Yz6EL5HG3w79aM83op8RuJfyRgcJs4UovzGt0NGdcasgZ0TJacVJxAtFq8hrt0N diff --git a/pg-ddl/schema/adtech/sdk_paths.sql b/pg-ddl/schema/adtech/sdk_paths.sql index 110698d0..9865a13c 100644 --- a/pg-ddl/schema/adtech/sdk_paths.sql +++ b/pg-ddl/schema/adtech/sdk_paths.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict f6og5qMazywBdB4c14z6ZO2vkxrJJsxxHg3IJ9RbIiEKZR8DvIesWNI7cE3b9LJ +\restrict iA4rKoYtKOpwYT3xNONcbVw0DpMvP9rpNu4bjQMyhcbNurUdFsy8OFnPKWf0CFi --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -92,5 +92,5 @@ ALTER TABLE ONLY adtech.sdk_paths -- PostgreSQL database dump complete -- -\unrestrict f6og5qMazywBdB4c14z6ZO2vkxrJJsxxHg3IJ9RbIiEKZR8DvIesWNI7cE3b9LJ +\unrestrict iA4rKoYtKOpwYT3xNONcbVw0DpMvP9rpNu4bjQMyhcbNurUdFsy8OFnPKWf0CFi diff --git a/pg-ddl/schema/adtech/sdk_strings__matview.sql b/pg-ddl/schema/adtech/sdk_strings__matview.sql new file mode 100644 index 00000000..bf5a7e8f --- /dev/null +++ b/pg-ddl/schema/adtech/sdk_strings__matview.sql @@ -0,0 +1,79 @@ +-- +-- PostgreSQL database dump +-- + +\restrict iS1rsl6XCay9C2wCIrWnuWUet947LYjpGEKLvu7BZReSHLVrJm65tb9dVbdxBlc + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: sdk_strings; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.sdk_strings AS + WITH matched_value_patterns AS ( + SELECT DISTINCT lower(vd.value_name) AS value_name_lower, + sp.sdk_id + FROM (public.version_strings vd + JOIN adtech.sdk_packages sp ON ((lower(vd.value_name) ~~ (lower((sp.package_pattern)::text) || '%'::text)))) + ), matched_path_patterns AS ( + SELECT DISTINCT lower(vd.xml_path) AS xml_path_lower, + ptm.sdk_id + FROM (public.version_strings vd + JOIN adtech.sdk_paths ptm ON ((lower(vd.xml_path) = lower((ptm.path_pattern)::text)))) + ), mediation_strings AS ( + SELECT vs.id AS version_string_id, + cmp.sdk_id, + lower(vs.value_name) AS value_name_lower + FROM (public.version_strings vs + JOIN adtech.sdk_mediation_patterns cmp ON ((lower(vs.value_name) ~~ (lower(concat((cmp.mediation_pattern)::text, '.')) || '%'::text)))) + ) + SELECT vs.id AS version_string_id, + mp.sdk_id + FROM (matched_value_patterns mp + JOIN public.version_strings vs ON ((lower(vs.value_name) = mp.value_name_lower))) +UNION + SELECT vs.id AS version_string_id, + mp.sdk_id + FROM (matched_path_patterns mp + JOIN public.version_strings vs ON ((lower(vs.xml_path) = mp.xml_path_lower))) +UNION + SELECT vs.id AS version_string_id, + ms.sdk_id + FROM (mediation_strings ms + JOIN public.version_strings vs ON ((lower(vs.value_name) = ms.value_name_lower))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.sdk_strings OWNER TO postgres; + +-- +-- Name: sdk_strings_version_string_id_sdk_id_idx; Type: INDEX; Schema: adtech; Owner: postgres +-- + +CREATE UNIQUE INDEX sdk_strings_version_string_id_sdk_id_idx ON adtech.sdk_strings USING btree (version_string_id, sdk_id); + + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict iS1rsl6XCay9C2wCIrWnuWUet947LYjpGEKLvu7BZReSHLVrJm65tb9dVbdxBlc + diff --git a/pg-ddl/schema/adtech/sdks.sql b/pg-ddl/schema/adtech/sdks.sql index 3b71e364..79e732f8 100644 --- a/pg-ddl/schema/adtech/sdks.sql +++ b/pg-ddl/schema/adtech/sdks.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 1Wy4VJZ2m4bnnSnPeJVYIIl5JPWGso9ThcdQU8PZaikkz6VbkTgkLOkkDUN8KeO +\restrict iJB3OkhmCNQqm23cQ8MdmXzFRJYTh9Zfqf13VNXicI5A3KqnNEyNwS8Y3XF25NE --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -92,5 +92,5 @@ ALTER TABLE ONLY adtech.sdks -- PostgreSQL database dump complete -- -\unrestrict 1Wy4VJZ2m4bnnSnPeJVYIIl5JPWGso9ThcdQU8PZaikkz6VbkTgkLOkkDUN8KeO +\unrestrict iJB3OkhmCNQqm23cQ8MdmXzFRJYTh9Zfqf13VNXicI5A3KqnNEyNwS8Y3XF25NE diff --git a/pg-ddl/schema/adtech/store_app_sdk_strings_2025_h1__matview.sql b/pg-ddl/schema/adtech/store_app_sdk_strings_2025_h1__matview.sql new file mode 100644 index 00000000..540b9510 --- /dev/null +++ b/pg-ddl/schema/adtech/store_app_sdk_strings_2025_h1__matview.sql @@ -0,0 +1,59 @@ +-- +-- PostgreSQL database dump +-- + +\restrict SwTY8iw4a11mvNlr40ibgnWCU2IhKJnYHKN8NJM5YkNUBWJaLj2LXAR18UxRF6Y + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: store_app_sdk_strings_2025_h1; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.store_app_sdk_strings_2025_h1 AS + WITH latest_version_codes AS ( + SELECT DISTINCT ON (vc_1.store_app) vc_1.id, + vc_1.store_app, + vc_1.version_code, + vc_1.updated_at, + vc_1.crawl_result + FROM (public.version_codes vc_1 + JOIN public.version_code_sdk_scan_results vcssr ON ((vc_1.id = vcssr.version_code_id))) + WHERE ((vcssr.scan_result = 1) AND (vc_1.updated_at >= '2025-01-01 00:00:00'::timestamp without time zone) AND (vc_1.updated_at < '2025-07-01 00:00:00'::timestamp without time zone)) + ORDER BY vc_1.store_app, (string_to_array((vc_1.version_code)::text, '.'::text))::bigint[] DESC + ) + SELECT vc.store_app, + vdm.string_id AS version_string_id, + sd.id AS sdk_id + FROM (((latest_version_codes vc + JOIN public.version_details_map vdm ON ((vc.id = vdm.version_code))) + JOIN adtech.sdk_strings css ON ((vdm.string_id = css.version_string_id))) + JOIN adtech.sdks sd ON ((css.sdk_id = sd.id))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.store_app_sdk_strings_2025_h1 OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict SwTY8iw4a11mvNlr40ibgnWCU2IhKJnYHKN8NJM5YkNUBWJaLj2LXAR18UxRF6Y + diff --git a/pg-ddl/schema/adtech/store_app_sdk_strings_2025_h2__matview.sql b/pg-ddl/schema/adtech/store_app_sdk_strings_2025_h2__matview.sql new file mode 100644 index 00000000..8dc1d203 --- /dev/null +++ b/pg-ddl/schema/adtech/store_app_sdk_strings_2025_h2__matview.sql @@ -0,0 +1,59 @@ +-- +-- PostgreSQL database dump +-- + +\restrict kn7yFKhPxeb65V4eDNto5dk2e98OcAMzfQlg5Egqrfy9SjgbHGvx1lJkrd3quag + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: store_app_sdk_strings_2025_h2; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.store_app_sdk_strings_2025_h2 AS + WITH latest_version_codes AS ( + SELECT DISTINCT ON (vc_1.store_app) vc_1.id, + vc_1.store_app, + vc_1.version_code, + vc_1.updated_at, + vc_1.crawl_result + FROM (public.version_codes vc_1 + JOIN public.version_code_sdk_scan_results vcssr ON ((vc_1.id = vcssr.version_code_id))) + WHERE ((vcssr.scan_result = 1) AND (vc_1.updated_at >= '2025-01-01 00:00:00'::timestamp without time zone) AND (vc_1.updated_at < '2026-01-01 00:00:00'::timestamp without time zone)) + ORDER BY vc_1.store_app, (string_to_array((vc_1.version_code)::text, '.'::text))::bigint[] DESC + ) + SELECT vc.store_app, + vdm.string_id AS version_string_id, + sd.id AS sdk_id + FROM (((latest_version_codes vc + JOIN public.version_details_map vdm ON ((vc.id = vdm.version_code))) + JOIN adtech.sdk_strings css ON ((vdm.string_id = css.version_string_id))) + JOIN adtech.sdks sd ON ((css.sdk_id = sd.id))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.store_app_sdk_strings_2025_h2 OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict kn7yFKhPxeb65V4eDNto5dk2e98OcAMzfQlg5Egqrfy9SjgbHGvx1lJkrd3quag + diff --git a/pg-ddl/schema/adtech/store_app_sdk_strings__matview.sql b/pg-ddl/schema/adtech/store_app_sdk_strings__matview.sql index 9107ca61..f142e46b 100644 --- a/pg-ddl/schema/adtech/store_app_sdk_strings__matview.sql +++ b/pg-ddl/schema/adtech/store_app_sdk_strings__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict m4dBlgO9HICEjRXgAYnqViOhBfdhbrYspF2FHY4lGzRurPgIhtIVlBPYIkrwrGj +\restrict HWhDPwvJ6b1G6zwdbnT40zitayXYF1H19bGLfzm84m3nc79FEg1Sy96eJHgRWhB --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -41,10 +41,10 @@ CREATE MATERIALIZED VIEW adtech.store_app_sdk_strings AS ) SELECT vc.store_app, vdm.string_id AS version_string_id, - css.company_id + css.sdk_id FROM ((latest_version_codes vc JOIN public.version_details_map vdm ON ((vc.id = vdm.version_code))) - LEFT JOIN adtech.company_sdk_strings css ON ((vdm.string_id = css.version_string_id))) + LEFT JOIN adtech.sdk_strings css ON ((vdm.string_id = css.version_string_id))) WITH NO DATA; @@ -54,12 +54,12 @@ ALTER MATERIALIZED VIEW adtech.store_app_sdk_strings OWNER TO postgres; -- Name: store_app_sdk_strings_idx; Type: INDEX; Schema: adtech; Owner: postgres -- -CREATE UNIQUE INDEX store_app_sdk_strings_idx ON adtech.store_app_sdk_strings USING btree (store_app, version_string_id, company_id); +CREATE UNIQUE INDEX store_app_sdk_strings_idx ON adtech.store_app_sdk_strings USING btree (store_app, version_string_id, sdk_id); -- -- PostgreSQL database dump complete -- -\unrestrict m4dBlgO9HICEjRXgAYnqViOhBfdhbrYspF2FHY4lGzRurPgIhtIVlBPYIkrwrGj +\unrestrict HWhDPwvJ6b1G6zwdbnT40zitayXYF1H19bGLfzm84m3nc79FEg1Sy96eJHgRWhB diff --git a/pg-ddl/schema/adtech/url_redirect_chains.sql b/pg-ddl/schema/adtech/url_redirect_chains.sql index 2ef9babb..c607789d 100644 --- a/pg-ddl/schema/adtech/url_redirect_chains.sql +++ b/pg-ddl/schema/adtech/url_redirect_chains.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict Xy8fNrmNyu6kLCkMfeRe9AH0T96jffY3y47pybAfLozPMQ3RvV8A3dYVWxDwO9I +\restrict ABc5s5Gg85to0fbEcESJ9PSfuKF2B8H3UJy79eGaoCVGVYnHt6qRa755EhcVew2 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -122,5 +122,5 @@ ALTER TABLE ONLY adtech.url_redirect_chains -- PostgreSQL database dump complete -- -\unrestrict Xy8fNrmNyu6kLCkMfeRe9AH0T96jffY3y47pybAfLozPMQ3RvV8A3dYVWxDwO9I +\unrestrict ABc5s5Gg85to0fbEcESJ9PSfuKF2B8H3UJy79eGaoCVGVYnHt6qRa755EhcVew2 diff --git a/pg-ddl/schema/adtech/urls.sql b/pg-ddl/schema/adtech/urls.sql index d10826df..26e90391 100644 --- a/pg-ddl/schema/adtech/urls.sql +++ b/pg-ddl/schema/adtech/urls.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict Y2z0az3xp4sPDBTaNmZbrPFkAP2y89c3oIsiUw7IcRSglYzWIGI7gAenSayy1aS +\restrict C18RinZKBEhKVEN9aJp3qxqo2zZoq73ruP940SH047N0ko3qgwzi38b191zq8JL --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -30,12 +30,12 @@ SET default_table_access_method = heap; CREATE TABLE adtech.urls ( id integer NOT NULL, url text NOT NULL, - url_hash character(32) GENERATED ALWAYS AS (md5(url)) STORED, domain_id integer, scheme text NOT NULL, is_deep_link boolean GENERATED ALWAYS AS ((scheme <> ALL (ARRAY['http'::text, 'https'::text, 'ftp'::text]))) STORED, created_at timestamp with time zone DEFAULT now(), - hostname text + hostname text, + url_hash character(32) ); @@ -93,10 +93,10 @@ CREATE INDEX idx_urls_scheme ON adtech.urls USING btree (scheme); -- --- Name: urls_idx; Type: INDEX; Schema: adtech; Owner: postgres +-- Name: urls_url_hash_idx; Type: INDEX; Schema: adtech; Owner: postgres -- -CREATE UNIQUE INDEX urls_idx ON adtech.urls USING btree (md5(url)); +CREATE UNIQUE INDEX urls_url_hash_idx ON adtech.urls USING btree (url_hash); -- @@ -111,5 +111,5 @@ ALTER TABLE ONLY adtech.urls -- PostgreSQL database dump complete -- -\unrestrict Y2z0az3xp4sPDBTaNmZbrPFkAP2y89c3oIsiUw7IcRSglYzWIGI7gAenSayy1aS +\unrestrict C18RinZKBEhKVEN9aJp3qxqo2zZoq73ruP940SH047N0ko3qgwzi38b191zq8JL diff --git a/pg-ddl/schema/frontend/adstxt_ad_domain_overview__matview.sql b/pg-ddl/schema/frontend/adstxt_ad_domain_overview__matview.sql index 4200e671..58945578 100644 --- a/pg-ddl/schema/frontend/adstxt_ad_domain_overview__matview.sql +++ b/pg-ddl/schema/frontend/adstxt_ad_domain_overview__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 4O7ELwYJwiFPPxoKD5w85HBy5zCKlbDhbRQQtktq0igxLBAlCi9VdJ8MrhrjAC0 +\restrict ci4IJBbZOQh22Fj6ydihw2i0K5Bwa9Hi0FsAFUT3pXAfpbI2gxL4wcDRdJcfNn0 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -62,5 +62,5 @@ CREATE UNIQUE INDEX adstxt_ad_domain_overview_unique_idx ON frontend.adstxt_ad_d -- PostgreSQL database dump complete -- -\unrestrict 4O7ELwYJwiFPPxoKD5w85HBy5zCKlbDhbRQQtktq0igxLBAlCi9VdJ8MrhrjAC0 +\unrestrict ci4IJBbZOQh22Fj6ydihw2i0K5Bwa9Hi0FsAFUT3pXAfpbI2gxL4wcDRdJcfNn0 diff --git a/pg-ddl/schema/frontend/adstxt_entries_store_apps__matview.sql b/pg-ddl/schema/frontend/adstxt_entries_store_apps__matview.sql index 35147fe2..8a7f32e7 100644 --- a/pg-ddl/schema/frontend/adstxt_entries_store_apps__matview.sql +++ b/pg-ddl/schema/frontend/adstxt_entries_store_apps__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict ZbcdXlmbpbXmzYrW9UTVZNJwsI7PVaqhcoUNhYXcTxEFZkt1zb1S7Zse8mLM71U +\restrict chSYvFNcCru1bAcqWh470eToN81kboSyLQSh4BhB1YKCc4SHKhxqT0fzZFgrfJS --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -81,5 +81,5 @@ CREATE UNIQUE INDEX adstxt_entries_store_apps_unique_idx ON frontend.adstxt_entr -- PostgreSQL database dump complete -- -\unrestrict ZbcdXlmbpbXmzYrW9UTVZNJwsI7PVaqhcoUNhYXcTxEFZkt1zb1S7Zse8mLM71U +\unrestrict chSYvFNcCru1bAcqWh470eToN81kboSyLQSh4BhB1YKCc4SHKhxqT0fzZFgrfJS diff --git a/pg-ddl/schema/frontend/adstxt_publishers_overview__matview.sql b/pg-ddl/schema/frontend/adstxt_publishers_overview__matview.sql index 2639074d..520289c9 100644 --- a/pg-ddl/schema/frontend/adstxt_publishers_overview__matview.sql +++ b/pg-ddl/schema/frontend/adstxt_publishers_overview__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict jkIjarrQh9petuCRFvLwmbgewRzgaH9yz7qH5pqNYBI9bRepkhQzD4Lfjg0HsFO +\restrict BE6pIiWPPnjXcQwomytG5bBpoXz34NF6HSfNJZlSYK0MDXRO82bZtX19tlVymDc --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -74,5 +74,5 @@ CREATE UNIQUE INDEX adstxt_publishers_overview_ad_domain_unique_idx ON frontend. -- PostgreSQL database dump complete -- -\unrestrict jkIjarrQh9petuCRFvLwmbgewRzgaH9yz7qH5pqNYBI9bRepkhQzD4Lfjg0HsFO +\unrestrict BE6pIiWPPnjXcQwomytG5bBpoXz34NF6HSfNJZlSYK0MDXRO82bZtX19tlVymDc diff --git a/pg-ddl/schema/frontend/advertiser_creative_rankings__matview.sql b/pg-ddl/schema/frontend/advertiser_creative_rankings__matview.sql index 4f07795f..4ce52a00 100644 --- a/pg-ddl/schema/frontend/advertiser_creative_rankings__matview.sql +++ b/pg-ddl/schema/frontend/advertiser_creative_rankings__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict oImMmxaGqRM9o0BiFDhkKpy8XzZq75AvtyhIJ0Cn1apBUmwIjYAFuj6vGxz9JVe +\restrict LOdxF0TTVceB8Wf5I8qazqs37h7RakaVsxkHDkRIex33VVqQl9KOQApsycE6Yyc --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -107,5 +107,5 @@ ALTER MATERIALIZED VIEW frontend.advertiser_creative_rankings OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict oImMmxaGqRM9o0BiFDhkKpy8XzZq75AvtyhIJ0Cn1apBUmwIjYAFuj6vGxz9JVe +\unrestrict LOdxF0TTVceB8Wf5I8qazqs37h7RakaVsxkHDkRIex33VVqQl9KOQApsycE6Yyc diff --git a/pg-ddl/schema/frontend/advertiser_creative_rankings_recent_month__matview.sql b/pg-ddl/schema/frontend/advertiser_creative_rankings_recent_month__matview.sql index c2be95c0..783d6266 100644 --- a/pg-ddl/schema/frontend/advertiser_creative_rankings_recent_month__matview.sql +++ b/pg-ddl/schema/frontend/advertiser_creative_rankings_recent_month__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict cuHRms1GxJ3abBK192Q489msTIVlKljtbvuUhBodhrS8VdvMJIOD2ctPL4tAqh0 +\restrict cUwQj5uttwo6ClQebjpY2CdxPuhMwGDptStOK6WjhNnPAkA31zUaLvCpjYa9qLP --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -114,5 +114,5 @@ ALTER MATERIALIZED VIEW frontend.advertiser_creative_rankings_recent_month OWNER -- PostgreSQL database dump complete -- -\unrestrict cuHRms1GxJ3abBK192Q489msTIVlKljtbvuUhBodhrS8VdvMJIOD2ctPL4tAqh0 +\unrestrict cUwQj5uttwo6ClQebjpY2CdxPuhMwGDptStOK6WjhNnPAkA31zUaLvCpjYa9qLP diff --git a/pg-ddl/schema/frontend/advertiser_creatives__matview.sql b/pg-ddl/schema/frontend/advertiser_creatives__matview.sql index c600d05f..70244918 100644 --- a/pg-ddl/schema/frontend/advertiser_creatives__matview.sql +++ b/pg-ddl/schema/frontend/advertiser_creatives__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict PpD8JCM7dLhNgUZDhTBgVWuTv2VKPEair2rrg7xtqnnNWeM1J9EuE5zvuPNQ9VU +\restrict aPU1W9ZeRLUpBbqpoqnS0UoMcxysH1unH9sOF2SIKTHXBQUI5OcWkKnF0asFZ5p --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -80,5 +80,5 @@ ALTER MATERIALIZED VIEW frontend.advertiser_creatives OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict PpD8JCM7dLhNgUZDhTBgVWuTv2VKPEair2rrg7xtqnnNWeM1J9EuE5zvuPNQ9VU +\unrestrict aPU1W9ZeRLUpBbqpoqnS0UoMcxysH1unH9sOF2SIKTHXBQUI5OcWkKnF0asFZ5p diff --git a/pg-ddl/schema/frontend/api_call_countries__matview.sql b/pg-ddl/schema/frontend/api_call_countries__matview.sql index 3a2f5299..a11e308b 100644 --- a/pg-ddl/schema/frontend/api_call_countries__matview.sql +++ b/pg-ddl/schema/frontend/api_call_countries__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict aFLd6gUYDYpTqQc9iosIiKqp9fgIGZdL5C6xNQoIMolsLJLdVgbw63aUxQhAHrQ +\restrict GDRs3Py58MI8rnet9oKvef3T2SUlQbqQGwOXxQa6N3bt1DiOmexFfgBG3A0Hsc2 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -87,5 +87,5 @@ CREATE UNIQUE INDEX api_call_countries_unique ON frontend.api_call_countries USI -- PostgreSQL database dump complete -- -\unrestrict aFLd6gUYDYpTqQc9iosIiKqp9fgIGZdL5C6xNQoIMolsLJLdVgbw63aUxQhAHrQ +\unrestrict GDRs3Py58MI8rnet9oKvef3T2SUlQbqQGwOXxQa6N3bt1DiOmexFfgBG3A0Hsc2 diff --git a/pg-ddl/schema/frontend/app_keyword_rank_stats__matview.sql b/pg-ddl/schema/frontend/app_keyword_rank_stats__matview.sql new file mode 100644 index 00000000..aa64592e --- /dev/null +++ b/pg-ddl/schema/frontend/app_keyword_rank_stats__matview.sql @@ -0,0 +1,76 @@ +-- +-- PostgreSQL database dump +-- + +\restrict P6gz6Q3tRD2X6biy9ZDcn3vnG6hYGp6vsE9dWdYS5oHDEprU0HWjRyOSh42x7vY + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: app_keyword_rank_stats; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- + +CREATE MATERIALIZED VIEW frontend.app_keyword_rank_stats AS + WITH latest_per_country AS ( + SELECT app_keyword_ranks_daily.country, + max(app_keyword_ranks_daily.crawled_date) AS max_crawled_date + FROM frontend.app_keyword_ranks_daily + GROUP BY app_keyword_ranks_daily.country + ), d30_keywords AS ( + SELECT akr.country, + akr.store_app, + akr.keyword_id, + min(akr.app_rank) AS d30_best_rank + FROM frontend.app_keyword_ranks_daily akr + WHERE (akr.crawled_date >= (CURRENT_DATE - '30 days'::interval)) + GROUP BY akr.country, akr.store_app, akr.keyword_id + ), latest_ranks AS ( + SELECT kr.country, + kr.store_app, + kr.keyword_id, + kr.app_rank AS latest_app_rank + FROM (frontend.app_keyword_ranks_daily kr + JOIN latest_per_country lpc ON (((kr.country = lpc.country) AND (kr.crawled_date = lpc.max_crawled_date)))) + ), all_ranked_keywords AS ( + SELECT rk.country, + rk.store_app, + rk.keyword_id, + rk.d30_best_rank, + lk.latest_app_rank + FROM (d30_keywords rk + LEFT JOIN latest_ranks lk ON (((lk.country = rk.country) AND (lk.store_app = rk.store_app) AND (lk.keyword_id = rk.keyword_id)))) + ) + SELECT country, + store_app, + keyword_id, + d30_best_rank, + latest_app_rank + FROM all_ranked_keywords + WITH NO DATA; + + +ALTER MATERIALIZED VIEW frontend.app_keyword_rank_stats OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict P6gz6Q3tRD2X6biy9ZDcn3vnG6hYGp6vsE9dWdYS5oHDEprU0HWjRyOSh42x7vY + diff --git a/pg-ddl/schema/frontend/app_keyword_ranks_daily.sql b/pg-ddl/schema/frontend/app_keyword_ranks_daily.sql index 77d709f6..7555b1e2 100644 --- a/pg-ddl/schema/frontend/app_keyword_ranks_daily.sql +++ b/pg-ddl/schema/frontend/app_keyword_ranks_daily.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict ZOGd1Icm0aDKNJxSwQxA7vAwjYaHMlrg1ye5PXWD4knlQf42hLOZyQgKNDEFTdy +\restrict rIqzHT9qLe9TFzPXbIEOzHZ5gxoyjW43xxF2N90s3HVEDXiPdmjEIcLUc7AJDIj --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -29,10 +29,11 @@ SET default_table_access_method = heap; CREATE TABLE frontend.app_keyword_ranks_daily ( crawled_date date NOT NULL, + store smallint NOT NULL, country smallint NOT NULL, - app_rank smallint NOT NULL, keyword_id integer NOT NULL, - store_app integer NOT NULL + store_app integer NOT NULL, + app_rank smallint NOT NULL ); @@ -43,7 +44,7 @@ ALTER TABLE frontend.app_keyword_ranks_daily OWNER TO postgres; -- ALTER TABLE ONLY frontend.app_keyword_ranks_daily - ADD CONSTRAINT app_keyword_rankings_unique_test UNIQUE (crawled_date, country, keyword_id, app_rank); + ADD CONSTRAINT app_keyword_rankings_unique_test UNIQUE (crawled_date, store, country, keyword_id, app_rank); -- @@ -60,9 +61,48 @@ CREATE INDEX app_keyword_ranks_daily_app_lookup ON frontend.app_keyword_ranks_da CREATE INDEX app_keyword_ranks_daily_date ON frontend.app_keyword_ranks_daily USING btree (crawled_date); +-- +-- Name: app_keywords_delete_and_insert_on; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX app_keywords_delete_and_insert_on ON frontend.app_keyword_ranks_daily USING btree (crawled_date, store); + + +-- +-- Name: app_keyword_ranks_daily country_kr_fk; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- + +ALTER TABLE ONLY frontend.app_keyword_ranks_daily + ADD CONSTRAINT country_kr_fk FOREIGN KEY (country) REFERENCES public.countries(id); + + +-- +-- Name: app_keyword_ranks_daily keyword_kr_fk; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- + +ALTER TABLE ONLY frontend.app_keyword_ranks_daily + ADD CONSTRAINT keyword_kr_fk FOREIGN KEY (keyword_id) REFERENCES public.keywords(id); + + +-- +-- Name: app_keyword_ranks_daily store_app_kr_fk; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- + +ALTER TABLE ONLY frontend.app_keyword_ranks_daily + ADD CONSTRAINT store_app_kr_fk FOREIGN KEY (store_app) REFERENCES public.store_apps(id); + + +-- +-- Name: app_keyword_ranks_daily store_kr_fk; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- + +ALTER TABLE ONLY frontend.app_keyword_ranks_daily + ADD CONSTRAINT store_kr_fk FOREIGN KEY (store) REFERENCES public.stores(id); + + -- -- PostgreSQL database dump complete -- -\unrestrict ZOGd1Icm0aDKNJxSwQxA7vAwjYaHMlrg1ye5PXWD4knlQf42hLOZyQgKNDEFTdy +\unrestrict rIqzHT9qLe9TFzPXbIEOzHZ5gxoyjW43xxF2N90s3HVEDXiPdmjEIcLUc7AJDIj diff --git a/pg-ddl/schema/frontend/apps_new_monthly__matview.sql b/pg-ddl/schema/frontend/apps_new_monthly__matview.sql index b6ed01a9..8ab91015 100644 --- a/pg-ddl/schema/frontend/apps_new_monthly__matview.sql +++ b/pg-ddl/schema/frontend/apps_new_monthly__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict tbCdjCylwLafDiFcIUwYMlYs7aS2KF6sijqF3iOw5S5Z3FJDQJpNfXbCg2PcpIP +\restrict JalSjLBAWboY1JBLBl0pFH0h7ji6XnVSFBBZ0AhQKP4ZwYceM6E6neS012mzr5e --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -34,6 +34,7 @@ CREATE MATERIALIZED VIEW frontend.apps_new_monthly AS sa_1.store_id, sa_1.store, sa_1.category, + sa_1.developer_name, sa_1.rating, sa_1.installs, sa_1.installs_sum_1w, @@ -46,7 +47,7 @@ CREATE MATERIALIZED VIEW frontend.apps_new_monthly AS sa_1.created_at, sa_1.updated_at, sa_1.crawl_result, - sa_1.icon_url_512, + sa_1.icon_url_100, sa_1.release_date, sa_1.rating_count, sa_1.featured_image_url, @@ -60,39 +61,38 @@ CREATE MATERIALIZED VIEW frontend.apps_new_monthly AS FROM frontend.store_apps_overview sa_1 WHERE ((sa_1.release_date >= (CURRENT_DATE - '30 days'::interval)) AND (sa_1.created_at >= (CURRENT_DATE - '45 days'::interval)) AND (sa_1.crawl_result = 1)) ) - SELECT ra.id, - ra.name, - ra.store_id, - ra.store, - ra.category, - ra.rating, - ra.installs, - ra.installs_sum_1w, - ra.installs_sum_4w, - ra.ratings_sum_1w, - ra.ratings_sum_4w, - ra.store_last_updated, - ra.ad_supported, - ra.in_app_purchases, - ra.created_at, - ra.updated_at, - ra.crawl_result, - sa.icon_url_100, - ra.icon_url_512, - ra.release_date, - ra.rating_count, - ra.featured_image_url, - ra.phone_image_url_1, - ra.phone_image_url_2, - ra.phone_image_url_3, - ra.tablet_image_url_1, - ra.tablet_image_url_2, - ra.tablet_image_url_3, - ra.category AS app_category, - ra.rn - FROM (rankedapps ra - LEFT JOIN public.store_apps sa ON ((ra.id = sa.id))) - WHERE (ra.rn <= 100) + SELECT id, + name, + store_id, + store, + category, + developer_name, + rating, + installs, + installs_sum_1w, + installs_sum_4w, + ratings_sum_1w, + ratings_sum_4w, + store_last_updated, + ad_supported, + in_app_purchases, + created_at, + updated_at, + crawl_result, + icon_url_100, + release_date, + rating_count, + featured_image_url, + phone_image_url_1, + phone_image_url_2, + phone_image_url_3, + tablet_image_url_1, + tablet_image_url_2, + tablet_image_url_3, + category AS app_category, + rn + FROM rankedapps ra + WHERE (rn <= 100) WITH NO DATA; @@ -116,5 +116,5 @@ CREATE UNIQUE INDEX idx_apps_new_monthly ON frontend.apps_new_monthly USING btre -- PostgreSQL database dump complete -- -\unrestrict tbCdjCylwLafDiFcIUwYMlYs7aS2KF6sijqF3iOw5S5Z3FJDQJpNfXbCg2PcpIP +\unrestrict JalSjLBAWboY1JBLBl0pFH0h7ji6XnVSFBBZ0AhQKP4ZwYceM6E6neS012mzr5e diff --git a/pg-ddl/schema/frontend/apps_new_weekly__matview.sql b/pg-ddl/schema/frontend/apps_new_weekly__matview.sql index f82e4f41..49dae087 100644 --- a/pg-ddl/schema/frontend/apps_new_weekly__matview.sql +++ b/pg-ddl/schema/frontend/apps_new_weekly__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict nH2bJrFcq5CbH6U8vDpRmzCLcTUMlyoUdxQNAfBkTnlmFZI1i5KnBgactkrOTzp +\restrict 03TfSZ7r4S7rVgvGfn6tfLnrv96Quqq4rMewRPUQgXNzxam54LmvXHw5k9vFeC1 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -34,6 +34,7 @@ CREATE MATERIALIZED VIEW frontend.apps_new_weekly AS sa_1.store_id, sa_1.store, sa_1.category, + sa_1.developer_name, sa_1.rating, sa_1.installs, sa_1.installs_sum_1w, @@ -46,7 +47,7 @@ CREATE MATERIALIZED VIEW frontend.apps_new_weekly AS sa_1.created_at, sa_1.updated_at, sa_1.crawl_result, - sa_1.icon_url_512, + sa_1.icon_url_100, sa_1.release_date, sa_1.rating_count, sa_1.featured_image_url, @@ -60,39 +61,38 @@ CREATE MATERIALIZED VIEW frontend.apps_new_weekly AS FROM frontend.store_apps_overview sa_1 WHERE ((sa_1.release_date >= (CURRENT_DATE - '7 days'::interval)) AND (sa_1.created_at >= (CURRENT_DATE - '11 days'::interval)) AND (sa_1.crawl_result = 1)) ) - SELECT ra.id, - ra.name, - ra.store_id, - ra.store, - ra.category, - ra.rating, - ra.installs, - ra.installs_sum_1w, - ra.installs_sum_4w, - ra.ratings_sum_1w, - ra.ratings_sum_4w, - ra.store_last_updated, - ra.ad_supported, - ra.in_app_purchases, - ra.created_at, - ra.updated_at, - ra.crawl_result, - sa.icon_url_100, - ra.icon_url_512, - ra.release_date, - ra.rating_count, - ra.featured_image_url, - ra.phone_image_url_1, - ra.phone_image_url_2, - ra.phone_image_url_3, - ra.tablet_image_url_1, - ra.tablet_image_url_2, - ra.tablet_image_url_3, - ra.category AS app_category, - ra.rn - FROM (rankedapps ra - LEFT JOIN public.store_apps sa ON ((ra.id = sa.id))) - WHERE (ra.rn <= 100) + SELECT id, + name, + store_id, + store, + category, + developer_name, + rating, + installs, + installs_sum_1w, + installs_sum_4w, + ratings_sum_1w, + ratings_sum_4w, + store_last_updated, + ad_supported, + in_app_purchases, + created_at, + updated_at, + crawl_result, + icon_url_100, + release_date, + rating_count, + featured_image_url, + phone_image_url_1, + phone_image_url_2, + phone_image_url_3, + tablet_image_url_1, + tablet_image_url_2, + tablet_image_url_3, + category AS app_category, + rn + FROM rankedapps ra + WHERE (rn <= 100) WITH NO DATA; @@ -123,5 +123,5 @@ CREATE UNIQUE INDEX idx_apps_new_weekly_f ON frontend.apps_new_weekly USING btre -- PostgreSQL database dump complete -- -\unrestrict nH2bJrFcq5CbH6U8vDpRmzCLcTUMlyoUdxQNAfBkTnlmFZI1i5KnBgactkrOTzp +\unrestrict 03TfSZ7r4S7rVgvGfn6tfLnrv96Quqq4rMewRPUQgXNzxam54LmvXHw5k9vFeC1 diff --git a/pg-ddl/schema/frontend/apps_new_yearly__matview.sql b/pg-ddl/schema/frontend/apps_new_yearly__matview.sql index fc7d8b9a..1ffebbdf 100644 --- a/pg-ddl/schema/frontend/apps_new_yearly__matview.sql +++ b/pg-ddl/schema/frontend/apps_new_yearly__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 13RXLUNgzUcQJWIkUyFvr7r8zfCV5hQiLOSUqWSkmFCUM2IF6qrR62g0TRDVkuU +\restrict BBPxcJyXVGPj4eUx8gqOEY2yWUmZ3Awc5KaBjLXjdMnN9M6Z38XqeoWVjkeJWvm --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -34,6 +34,8 @@ CREATE MATERIALIZED VIEW frontend.apps_new_yearly AS sa_1.store_id, sa_1.store, sa_1.category, + sa_1.developer_name, + sa_1.icon_url_100, sa_1.rating, sa_1.installs, sa_1.installs_sum_1w, @@ -60,39 +62,38 @@ CREATE MATERIALIZED VIEW frontend.apps_new_yearly AS FROM frontend.store_apps_overview sa_1 WHERE ((sa_1.release_date >= (CURRENT_DATE - '365 days'::interval)) AND (sa_1.created_at >= (CURRENT_DATE - '380 days'::interval)) AND (sa_1.crawl_result = 1)) ) - SELECT ra.id, - ra.name, - ra.store_id, - ra.store, - ra.category, - ra.rating, - ra.installs, - ra.installs_sum_1w, - ra.installs_sum_4w, - ra.ratings_sum_1w, - ra.ratings_sum_4w, - ra.store_last_updated, - ra.ad_supported, - ra.in_app_purchases, - ra.created_at, - ra.updated_at, - ra.crawl_result, - sa.icon_url_100, - ra.icon_url_512, - ra.release_date, - ra.rating_count, - ra.featured_image_url, - ra.phone_image_url_1, - ra.phone_image_url_2, - ra.phone_image_url_3, - ra.tablet_image_url_1, - ra.tablet_image_url_2, - ra.tablet_image_url_3, - ra.category AS app_category, - ra.rn - FROM (rankedapps ra - LEFT JOIN public.store_apps sa ON ((ra.id = sa.id))) - WHERE (ra.rn <= 100) + SELECT id, + name, + store_id, + store, + category, + developer_name, + rating, + installs, + installs_sum_1w, + installs_sum_4w, + ratings_sum_1w, + ratings_sum_4w, + store_last_updated, + ad_supported, + in_app_purchases, + created_at, + updated_at, + crawl_result, + icon_url_100, + release_date, + rating_count, + featured_image_url, + phone_image_url_1, + phone_image_url_2, + phone_image_url_3, + tablet_image_url_1, + tablet_image_url_2, + tablet_image_url_3, + category AS app_category, + rn + FROM rankedapps ra + WHERE (rn <= 100) WITH NO DATA; @@ -116,5 +117,5 @@ CREATE UNIQUE INDEX idx_apps_new_yearly ON frontend.apps_new_yearly USING btree -- PostgreSQL database dump complete -- -\unrestrict 13RXLUNgzUcQJWIkUyFvr7r8zfCV5hQiLOSUqWSkmFCUM2IF6qrR62g0TRDVkuU +\unrestrict BBPxcJyXVGPj4eUx8gqOEY2yWUmZ3Awc5KaBjLXjdMnN9M6Z38XqeoWVjkeJWvm diff --git a/pg-ddl/schema/frontend/category_tag_stats__matview.sql b/pg-ddl/schema/frontend/category_tag_stats__matview.sql index cde8203d..efdb5013 100644 --- a/pg-ddl/schema/frontend/category_tag_stats__matview.sql +++ b/pg-ddl/schema/frontend/category_tag_stats__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict W3ZBBkZsIYC80fbuxDqkNUZSde9YoGFprPNiQ54I831I9v3ARXErGMYFOncwSJT +\restrict LASz3sDy0wCNfO0ymxaE0OeYa7z5bxjuXDCsvTgvPhkVtFFBP2eAfM6pfjmEdNz --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -28,36 +28,22 @@ SET default_table_access_method = heap; -- CREATE MATERIALIZED VIEW frontend.category_tag_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, - tag.tag_source, - sa.installs, - sa.rating_count - FROM ((adtech.combined_store_apps_companies csac - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) + WITH distinct_apps_group AS ( + SELECT DISTINCT csac.store_app, + tag.tag_source + FROM (adtech.combined_store_apps_companies csac CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) WHERE (tag.present IS TRUE) ) - SELECT dag.store, - dag.app_category, + SELECT sa.store, + sa.category AS app_category, dag.tag_source, count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.tag_source + LEFT JOIN frontend.store_apps_overview sa ON ((dag.store_app = sa.id))) + GROUP BY sa.store, sa.category, dag.tag_source WITH NO DATA; @@ -74,5 +60,5 @@ CREATE UNIQUE INDEX idx_category_tag_stats ON frontend.category_tag_stats USING -- PostgreSQL database dump complete -- -\unrestrict W3ZBBkZsIYC80fbuxDqkNUZSde9YoGFprPNiQ54I831I9v3ARXErGMYFOncwSJT +\unrestrict LASz3sDy0wCNfO0ymxaE0OeYa7z5bxjuXDCsvTgvPhkVtFFBP2eAfM6pfjmEdNz diff --git a/pg-ddl/schema/frontend/category_tag_type_stats__matview.sql b/pg-ddl/schema/frontend/category_tag_type_stats__matview.sql new file mode 100644 index 00000000..275e41d5 --- /dev/null +++ b/pg-ddl/schema/frontend/category_tag_type_stats__matview.sql @@ -0,0 +1,114 @@ +-- +-- PostgreSQL database dump +-- + +\restrict BDYVeqQAbV5Wag6iNnxaOseoJR6TxFmv1rzFBaCwfAfbYR8FWu5AdhA2cBQb4yY + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: category_tag_type_stats; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- + +CREATE MATERIALIZED VIEW frontend.category_tag_type_stats AS + WITH minimized_company_categories AS ( + SELECT company_categories.company_id, + min(company_categories.category_id) AS category_id + FROM adtech.company_categories + GROUP BY company_categories.company_id + ), api_and_app_ads AS ( + SELECT x.store, + x.app_category, + x.tag_source, + x.type_url_slug, + count(*) AS app_count, + sum(x.installs_sum_4w_est) AS installs_d30, + sum(x.installs_est) AS installs_total + FROM ( SELECT DISTINCT csac.store_app, + sa.store, + csac.app_category, + tag.tag_source, + CASE + WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying + ELSE cats.url_slug + END AS type_url_slug, + sa.installs_sum_4w_est, + sa.installs_est + FROM ((((adtech.combined_store_apps_companies csac + LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) + JOIN minimized_company_categories mcc ON ((csac.company_id = mcc.company_id))) + LEFT JOIN adtech.categories cats ON ((mcc.category_id = cats.id))) + CROSS JOIN LATERAL ( VALUES ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) + WHERE ((tag.present IS TRUE) AND (sa.id IS NOT NULL))) x + GROUP BY x.store, x.app_category, x.tag_source, x.type_url_slug + ), store_app_sdks AS ( + SELECT DISTINCT sass.store_app, + sass.sdk_id + FROM adtech.store_app_sdk_strings sass + WHERE (sass.sdk_id IS NOT NULL) + ), sdk_and_mediation AS ( + SELECT x.store, + x.app_category, + 'sdk'::text AS tag_source, + x.type_url_slug, + count(*) AS app_count, + sum(x.installs_sum_4w_est) AS installs_d30, + sum(x.installs_est) AS installs_total + FROM ( SELECT DISTINCT sas.store_app, + sa.store, + sa.category AS app_category, + cats.url_slug AS type_url_slug, + sa.installs_sum_4w_est, + sa.installs_est + FROM (((store_app_sdks sas + LEFT JOIN frontend.store_apps_overview sa ON ((sas.store_app = sa.id))) + LEFT JOIN adtech.sdk_categories sc ON ((sas.sdk_id = sc.sdk_id))) + LEFT JOIN adtech.categories cats ON ((sc.category_id = cats.id))) + WHERE (sa.id IS NOT NULL)) x + GROUP BY x.store, x.app_category, x.type_url_slug + ) + SELECT api_and_app_ads.store, + api_and_app_ads.app_category, + api_and_app_ads.tag_source, + api_and_app_ads.type_url_slug, + api_and_app_ads.app_count, + api_and_app_ads.installs_d30, + api_and_app_ads.installs_total + FROM api_and_app_ads +UNION ALL + SELECT sdk_and_mediation.store, + sdk_and_mediation.app_category, + sdk_and_mediation.tag_source, + sdk_and_mediation.type_url_slug, + sdk_and_mediation.app_count, + sdk_and_mediation.installs_d30, + sdk_and_mediation.installs_total + FROM sdk_and_mediation + WITH NO DATA; + + +ALTER MATERIALIZED VIEW frontend.category_tag_type_stats OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict BDYVeqQAbV5Wag6iNnxaOseoJR6TxFmv1rzFBaCwfAfbYR8FWu5AdhA2cBQb4yY + diff --git a/pg-ddl/schema/frontend/companies_apps_overview__matview.sql b/pg-ddl/schema/frontend/companies_apps_overview__matview.sql index 064f4df2..04f120ed 100644 --- a/pg-ddl/schema/frontend/companies_apps_overview__matview.sql +++ b/pg-ddl/schema/frontend/companies_apps_overview__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict McWZsRSE6lmNsTrHOiO50dy5deqDGkyHVnhvb3uID5bdwTeWjdmia7yfTydUJi2 +\restrict CZKm0CGXlpImPPmMWoTyHc9mb5gNChQQDDKzTozgfJ4SxmyfD9eYfc0WKnicGE9 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -28,22 +28,24 @@ SET default_table_access_method = heap; -- CREATE MATERIALIZED VIEW frontend.companies_apps_overview AS - WITH store_app_sdk_companies AS ( + WITH store_app_sdk_company_category AS ( SELECT DISTINCT savs.store_app, - savs.company_id - FROM adtech.store_app_sdk_strings savs + sd.company_id, + sc.category_id + FROM ((adtech.store_app_sdk_strings savs + LEFT JOIN adtech.sdks sd ON ((savs.sdk_id = sd.id))) + JOIN adtech.sdk_categories sc ON ((savs.sdk_id = sc.sdk_id))) ) SELECT sa.store_id, sacs.company_id, c.name AS company_name, d.domain_name AS company_domain, cc2.url_slug AS category_slug - FROM (((((store_app_sdk_companies sacs + FROM ((((store_app_sdk_company_category sacs LEFT JOIN public.store_apps sa ON ((sacs.store_app = sa.id))) LEFT JOIN adtech.companies c ON ((sacs.company_id = c.id))) LEFT JOIN public.domains d ON ((c.domain_id = d.id))) - LEFT JOIN adtech.company_categories cc ON ((c.id = cc.company_id))) - LEFT JOIN adtech.categories cc2 ON ((cc.category_id = cc2.id))) + LEFT JOIN adtech.categories cc2 ON ((sacs.category_id = cc2.id))) WHERE (sacs.company_id IS NOT NULL) WITH NO DATA; @@ -68,5 +70,5 @@ CREATE UNIQUE INDEX companies_apps_overview_unique_idx ON frontend.companies_app -- PostgreSQL database dump complete -- -\unrestrict McWZsRSE6lmNsTrHOiO50dy5deqDGkyHVnhvb3uID5bdwTeWjdmia7yfTydUJi2 +\unrestrict CZKm0CGXlpImPPmMWoTyHc9mb5gNChQQDDKzTozgfJ4SxmyfD9eYfc0WKnicGE9 diff --git a/pg-ddl/schema/frontend/companies_category_stats__matview.sql b/pg-ddl/schema/frontend/companies_category_stats__matview.sql index be452cfe..9041c9f1 100644 --- a/pg-ddl/schema/frontend/companies_category_stats__matview.sql +++ b/pg-ddl/schema/frontend/companies_category_stats__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict Pyr6OgdmoiC2HfEDC7R5RJW75JLxc3bNnH28YtMlfK0gt3RTAW7U9nbhoT9elH1 +\restrict 44gPIVLi99vTWVpc9BvjR0tMSVsWNW17PWCCcgeVag8gNzNghPidp5dOXYRtWkT --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -28,37 +28,17 @@ SET default_table_access_method = heap; -- CREATE MATERIALIZED VIEW frontend.companies_category_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, - csac.ad_domain AS company_domain, - c.name AS company_name, - sa.installs, - sa.rating_count - FROM ((adtech.combined_store_apps_companies csac - LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) - ) - SELECT dag.store, - dag.app_category, - dag.company_domain, - dag.company_name, - count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total - FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.company_domain, dag.company_name + SELECT sa.store, + sa.category AS app_category, + csac.ad_domain AS company_domain, + c.name AS company_name, + count(DISTINCT csac.store_app) AS app_count, + sum(sa.installs_est) AS installs_total, + sum(sa.installs_sum_4w_est) AS installs_d30 + FROM ((adtech.combined_store_apps_companies csac + LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) + LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) + GROUP BY sa.store, sa.category, csac.ad_domain, c.name WITH NO DATA; @@ -82,5 +62,5 @@ CREATE INDEX companies_category_stats_query_idx ON frontend.companies_category_s -- PostgreSQL database dump complete -- -\unrestrict Pyr6OgdmoiC2HfEDC7R5RJW75JLxc3bNnH28YtMlfK0gt3RTAW7U9nbhoT9elH1 +\unrestrict 44gPIVLi99vTWVpc9BvjR0tMSVsWNW17PWCCcgeVag8gNzNghPidp5dOXYRtWkT diff --git a/pg-ddl/schema/frontend/companies_category_tag_stats__matview.sql b/pg-ddl/schema/frontend/companies_category_tag_stats__matview.sql index 54017c39..7ab83186 100644 --- a/pg-ddl/schema/frontend/companies_category_tag_stats__matview.sql +++ b/pg-ddl/schema/frontend/companies_category_tag_stats__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 9Ot6J3fpm1qvlgOkMcxu1UbBcubcBIqCgkNMb3c1E0yghC6fMdoiahARJApdtYy +\restrict CYElgV2E3gN9AGY07Y8kDEvsYhV85bmkeMNRfm6XWq3PRCWVQRjoKZKTWNrKEVC --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -28,41 +28,28 @@ SET default_table_access_method = heap; -- CREATE MATERIALIZED VIEW frontend.companies_category_tag_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, + WITH distinct_apps_group AS ( + SELECT csac.store_app, csac.app_category, tag.tag_source, csac.ad_domain AS company_domain, - c.name AS company_name, - sa.installs, - sa.rating_count - FROM (((adtech.combined_store_apps_companies csac + c.name AS company_name + FROM ((adtech.combined_store_apps_companies csac LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) WHERE (tag.present IS TRUE) ) - SELECT dag.store, - dag.app_category, + SELECT sa.store, + sa.category AS app_category, dag.tag_source, dag.company_domain, dag.company_name, count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.tag_source, dag.company_domain, dag.company_name + LEFT JOIN frontend.store_apps_overview sa ON ((dag.store_app = sa.id))) + GROUP BY sa.store, sa.category, dag.tag_source, dag.company_domain, dag.company_name WITH NO DATA; @@ -86,5 +73,5 @@ CREATE UNIQUE INDEX companies_category_tag_stats_idx ON frontend.companies_categ -- PostgreSQL database dump complete -- -\unrestrict 9Ot6J3fpm1qvlgOkMcxu1UbBcubcBIqCgkNMb3c1E0yghC6fMdoiahARJApdtYy +\unrestrict CYElgV2E3gN9AGY07Y8kDEvsYhV85bmkeMNRfm6XWq3PRCWVQRjoKZKTWNrKEVC diff --git a/pg-ddl/schema/frontend/companies_category_tag_type_stats__matview.sql b/pg-ddl/schema/frontend/companies_category_tag_type_stats__matview.sql index d650d06e..129e7d99 100644 --- a/pg-ddl/schema/frontend/companies_category_tag_type_stats__matview.sql +++ b/pg-ddl/schema/frontend/companies_category_tag_type_stats__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 3L2ildO0psUAFfSVnAbwC2QssZ7mgrHiLXIHOzJ3H6pi6M6uMKGs7hmTAOZrnwf +\restrict VTvniZaod6uQ0cFaZj86PDwkDy2u9BlXtOhGKZIeoIBogW54ktvHz0gb6tfklMj --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -28,63 +28,96 @@ SET default_table_access_method = heap; -- CREATE MATERIALIZED VIEW frontend.companies_category_tag_type_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app + WITH minimized_company_categories AS ( + SELECT company_categories.company_id, + min(company_categories.category_id) AS category_id + FROM adtech.company_categories + GROUP BY company_categories.company_id + ), api_and_app_ads AS ( + SELECT sa.store, + csac.app_category, + tag.tag_source, + csac.ad_domain AS company_domain, + c.name AS company_name, + CASE + WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying + ELSE cats.url_slug + END AS type_url_slug, + count(DISTINCT csac.store_app) AS app_count, + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total + FROM (((((adtech.combined_store_apps_companies csac + LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) + LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) + LEFT JOIN minimized_company_categories mcc ON ((csac.company_id = mcc.company_id))) + LEFT JOIN adtech.categories cats ON ((mcc.category_id = cats.id))) + CROSS JOIN LATERAL ( VALUES ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) + WHERE (tag.present IS TRUE) + GROUP BY sa.store, csac.app_category, tag.tag_source, csac.ad_domain, c.name, + CASE + WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying + ELSE cats.url_slug + END + ), store_app_sdks AS ( + SELECT DISTINCT sass.store_app, + sass.sdk_id + FROM adtech.store_app_sdk_strings sass + WHERE (sass.sdk_id IS NOT NULL) + ), sdk_and_mediation AS ( + SELECT sa.store, + sa.category AS app_category, + 'sdk'::text AS tag_source, + d.domain_name AS company_domain, + c.name AS company_name, + cats.url_slug AS type_url_slug, + count(DISTINCT sas.store_app) AS app_count, + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total + FROM ((((((store_app_sdks sas + LEFT JOIN adtech.sdks s ON ((sas.sdk_id = s.id))) + LEFT JOIN adtech.companies c ON ((s.company_id = c.id))) + LEFT JOIN public.domains d ON ((c.domain_id = d.id))) + LEFT JOIN frontend.store_apps_overview sa ON ((sas.store_app = sa.id))) + LEFT JOIN adtech.sdk_categories sc ON ((sas.sdk_id = sc.sdk_id))) + LEFT JOIN adtech.categories cats ON ((sc.category_id = cats.id))) + GROUP BY sa.store, sa.category, 'sdk'::text, d.domain_name, c.name, cats.url_slug ) - SELECT sa.store, - csac.app_category, - tag.tag_source, - csac.ad_domain AS company_domain, - c.name AS company_name, - CASE - WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying - ELSE cats.url_slug - END AS type_url_slug, - count(DISTINCT csac.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(sa.installs) AS installs_total, - sum(sa.rating_count) AS rating_count_total - FROM ((((((adtech.combined_store_apps_companies csac - LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) - LEFT JOIN d30_counts dc ON ((csac.store_app = dc.store_app))) - LEFT JOIN adtech.company_categories ccats ON ((csac.company_id = ccats.company_id))) - LEFT JOIN adtech.categories cats ON ((ccats.category_id = cats.id))) - CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) - WHERE (tag.present IS TRUE) - GROUP BY sa.store, csac.app_category, tag.tag_source, csac.ad_domain, c.name, - CASE - WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying - ELSE cats.url_slug - END + SELECT api_and_app_ads.store, + api_and_app_ads.app_category, + api_and_app_ads.tag_source, + api_and_app_ads.company_domain, + api_and_app_ads.company_name, + api_and_app_ads.type_url_slug, + api_and_app_ads.app_count, + api_and_app_ads.installs_d30, + api_and_app_ads.installs_total + FROM api_and_app_ads +UNION ALL + SELECT sdk_and_mediation.store, + sdk_and_mediation.app_category, + sdk_and_mediation.tag_source, + sdk_and_mediation.company_domain, + sdk_and_mediation.company_name, + sdk_and_mediation.type_url_slug, + sdk_and_mediation.app_count, + sdk_and_mediation.installs_d30, + sdk_and_mediation.installs_total + FROM sdk_and_mediation WITH NO DATA; ALTER MATERIALIZED VIEW frontend.companies_category_tag_type_stats OWNER TO postgres; -- --- Name: companies_category_tag_type_stats_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- Name: frontend_companies_category_tag_type_stats_unique; Type: INDEX; Schema: frontend; Owner: postgres -- -CREATE UNIQUE INDEX companies_category_tag_type_stats_idx ON frontend.companies_category_tag_type_stats USING btree (store, tag_source, app_category, company_domain, type_url_slug); - - --- --- Name: companies_category_tag_type_stats_query_idx; Type: INDEX; Schema: frontend; Owner: postgres --- - -CREATE INDEX companies_category_tag_type_stats_query_idx ON frontend.companies_category_tag_type_stats USING btree (type_url_slug, app_category); +CREATE UNIQUE INDEX frontend_companies_category_tag_type_stats_unique ON frontend.companies_category_tag_type_stats USING btree (store, app_category, tag_source, company_domain, type_url_slug); -- -- PostgreSQL database dump complete -- -\unrestrict 3L2ildO0psUAFfSVnAbwC2QssZ7mgrHiLXIHOzJ3H6pi6M6uMKGs7hmTAOZrnwf +\unrestrict VTvniZaod6uQ0cFaZj86PDwkDy2u9BlXtOhGKZIeoIBogW54ktvHz0gb6tfklMj diff --git a/pg-ddl/schema/frontend/companies_creative_rankings__matview.sql b/pg-ddl/schema/frontend/companies_creative_rankings__matview.sql index 4556d854..51e318fa 100644 --- a/pg-ddl/schema/frontend/companies_creative_rankings__matview.sql +++ b/pg-ddl/schema/frontend/companies_creative_rankings__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict fV33aRRMVQ2e3nf0MUChb5XLNT3keXw6qdL0w7Cnv5CPb7XAghKm9HAf8zoay6M +\restrict FytOn9bjeew8dS5db70eGQNx2OHAJTB6TZKPGOJqBunPNUWno5FNOEgOoDIfoQs --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -32,6 +32,7 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings AS SELECT ca.file_extension, ac_1.id AS api_call_id, cr.advertiser_store_app_id, + cr.advertiser_domain_id, cr.creative_initial_domain_id, cr.creative_host_domain_id, cr.additional_ad_domain_ids, @@ -49,6 +50,7 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings AS cr.file_extension, cr.creative_initial_domain_id AS domain_id, cr.advertiser_store_app_id, + cr.advertiser_domain_id, cr.run_at FROM creative_rankings cr UNION @@ -58,6 +60,7 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings AS cr.file_extension, cr.creative_host_domain_id, cr.advertiser_store_app_id, + cr.advertiser_domain_id, cr.run_at FROM creative_rankings cr UNION @@ -67,19 +70,21 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings AS cr.file_extension, unnest(cr.additional_ad_domain_ids) AS unnest, cr.advertiser_store_app_id, + cr.advertiser_domain_id, cr.run_at FROM creative_rankings cr ), visually_distinct AS ( SELECT cdm.company_id, cd.file_extension, cd.advertiser_store_app_id, + cd.advertiser_domain_id, cd.vhash, min((cd.md5_hash)::text) AS md5_hash, max(cd.api_call_id) AS last_api_call_id, max(cd.run_at) AS last_seen FROM (combined_domains cd LEFT JOIN adtech.company_domain_mapping cdm ON ((cd.domain_id = cdm.domain_id))) - GROUP BY cdm.company_id, cd.file_extension, cd.advertiser_store_app_id, cd.vhash + GROUP BY cdm.company_id, cd.file_extension, cd.advertiser_store_app_id, cd.advertiser_domain_id, cd.vhash ) SELECT vd.company_id, vd.md5_hash, @@ -88,6 +93,7 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings AS saa.name AS advertiser_name, saa.store, saa.store_id AS advertiser_store_id, + adv.domain_name AS advertiser_domain_name, sap.store_id AS publisher_store_id, sap.name AS publisher_name, saa.installs, @@ -106,10 +112,11 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings AS WHEN (sap.icon_url_100 IS NOT NULL) THEN (concat('https://media.appgoblin.info/app-icons/', sap.store_id, '/', sap.icon_url_100))::character varying ELSE sap.icon_url_512 END AS publisher_icon_url - FROM (((((visually_distinct vd + FROM ((((((visually_distinct vd LEFT JOIN public.api_calls ac ON ((vd.last_api_call_id = ac.id))) LEFT JOIN adtech.companies c ON ((vd.company_id = c.id))) LEFT JOIN public.domains ad ON ((c.domain_id = ad.id))) + LEFT JOIN public.domains adv ON ((vd.advertiser_domain_id = adv.id))) LEFT JOIN frontend.store_apps_overview saa ON ((vd.advertiser_store_app_id = saa.id))) LEFT JOIN frontend.store_apps_overview sap ON ((ac.store_app = sap.id))) WHERE (c.id IS NOT NULL) @@ -123,5 +130,5 @@ ALTER MATERIALIZED VIEW frontend.companies_creative_rankings OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict fV33aRRMVQ2e3nf0MUChb5XLNT3keXw6qdL0w7Cnv5CPb7XAghKm9HAf8zoay6M +\unrestrict FytOn9bjeew8dS5db70eGQNx2OHAJTB6TZKPGOJqBunPNUWno5FNOEgOoDIfoQs diff --git a/pg-ddl/schema/frontend/companies_creative_rankings_new__matview.sql b/pg-ddl/schema/frontend/companies_creative_rankings_new__matview.sql deleted file mode 100644 index 6d7eaaf8..00000000 --- a/pg-ddl/schema/frontend/companies_creative_rankings_new__matview.sql +++ /dev/null @@ -1,127 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict 7OzX9gsuerdxQi2LeLbrIbyNjAwRtLWmhJfpsZUsAthCNzVZTkLz2IkS8aAvNfs - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: companies_creative_rankings_new; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres --- - -CREATE MATERIALIZED VIEW frontend.companies_creative_rankings_new AS - WITH creative_rankings AS ( - SELECT ca.file_extension, - ac_1.id AS api_call_id, - cr.advertiser_store_app_id, - cr.creative_initial_domain_id, - cr.creative_host_domain_id, - cr.additional_ad_domain_ids, - vcasr.run_at, - ca.md5_hash, - COALESCE(ca.phash, ca.md5_hash) AS vhash - FROM (((public.creative_records cr - LEFT JOIN public.creative_assets ca ON ((cr.creative_asset_id = ca.id))) - LEFT JOIN public.api_calls ac_1 ON ((cr.api_call_id = ac_1.id))) - LEFT JOIN public.version_code_api_scan_results vcasr ON ((ac_1.run_id = vcasr.id))) - ), combined_domains AS ( - SELECT cr.api_call_id, - cr.vhash, - cr.md5_hash, - cr.file_extension, - cr.creative_initial_domain_id AS domain_id, - cr.advertiser_store_app_id, - cr.run_at - FROM creative_rankings cr - UNION - SELECT cr.api_call_id, - cr.vhash, - cr.md5_hash, - cr.file_extension, - cr.creative_host_domain_id, - cr.advertiser_store_app_id, - cr.run_at - FROM creative_rankings cr - UNION - SELECT cr.api_call_id, - cr.vhash, - cr.md5_hash, - cr.file_extension, - unnest(cr.additional_ad_domain_ids) AS unnest, - cr.advertiser_store_app_id, - cr.run_at - FROM creative_rankings cr - ), visually_distinct AS ( - SELECT cdm.company_id, - cd.file_extension, - cd.advertiser_store_app_id, - cd.vhash, - min((cd.md5_hash)::text) AS md5_hash, - max(cd.api_call_id) AS last_api_call_id, - max(cd.run_at) AS last_seen - FROM (combined_domains cd - LEFT JOIN adtech.company_domain_mapping cdm ON ((cd.domain_id = cdm.domain_id))) - GROUP BY cdm.company_id, cd.file_extension, cd.advertiser_store_app_id, cd.vhash - ) - SELECT vd.company_id, - vd.md5_hash, - vd.file_extension, - ad.domain_name AS company_domain, - saa.name AS advertiser_name, - saa.store, - saa.store_id AS advertiser_store_id, - sap.store_id AS publisher_store_id, - sap.name AS publisher_name, - saa.installs, - saa.rating_count, - saa.rating, - saa.installs_sum_1w, - saa.ratings_sum_1w, - saa.installs_sum_4w, - saa.ratings_sum_4w, - vd.last_seen, - CASE - WHEN (saa.icon_url_100 IS NOT NULL) THEN (concat('https://media.appgoblin.info/app-icons/', saa.store_id, '/', saa.icon_url_100))::character varying - ELSE saa.icon_url_512 - END AS advertiser_icon_url, - CASE - WHEN (sap.icon_url_100 IS NOT NULL) THEN (concat('https://media.appgoblin.info/app-icons/', sap.store_id, '/', sap.icon_url_100))::character varying - ELSE sap.icon_url_512 - END AS publisher_icon_url - FROM (((((visually_distinct vd - LEFT JOIN public.api_calls ac ON ((vd.last_api_call_id = ac.id))) - LEFT JOIN adtech.companies c ON ((vd.company_id = c.id))) - LEFT JOIN public.domains ad ON ((c.domain_id = ad.id))) - LEFT JOIN frontend.store_apps_overview saa ON ((vd.advertiser_store_app_id = saa.id))) - LEFT JOIN frontend.store_apps_overview sap ON ((ac.store_app = sap.id))) - WHERE (c.id IS NOT NULL) - ORDER BY vd.last_seen DESC - WITH NO DATA; - - -ALTER MATERIALIZED VIEW frontend.companies_creative_rankings_new OWNER TO postgres; - --- --- PostgreSQL database dump complete --- - -\unrestrict 7OzX9gsuerdxQi2LeLbrIbyNjAwRtLWmhJfpsZUsAthCNzVZTkLz2IkS8aAvNfs - diff --git a/pg-ddl/schema/frontend/companies_open_source_percent__matview.sql b/pg-ddl/schema/frontend/companies_open_source_percent__matview.sql index df8b0712..92bf5b52 100644 --- a/pg-ddl/schema/frontend/companies_open_source_percent__matview.sql +++ b/pg-ddl/schema/frontend/companies_open_source_percent__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict vTFrtQvxdLOmgfM4EUrTfUkD8XHbXjmRvattvC3KqEEWCDj8hqBAVF7C3yONzd8 +\restrict RlauLvDb30InSWbRxqbYcZjMJXoLTjVmoNxW4bbvbqyRlrZD7c1H1UGN9m1dThR --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -54,5 +54,5 @@ CREATE UNIQUE INDEX companies_open_source_percent_unique ON frontend.companies_o -- PostgreSQL database dump complete -- -\unrestrict vTFrtQvxdLOmgfM4EUrTfUkD8XHbXjmRvattvC3KqEEWCDj8hqBAVF7C3yONzd8 +\unrestrict RlauLvDb30InSWbRxqbYcZjMJXoLTjVmoNxW4bbvbqyRlrZD7c1H1UGN9m1dThR diff --git a/pg-ddl/schema/frontend/companies_parent_category_stats__matview.sql b/pg-ddl/schema/frontend/companies_parent_category_stats__matview.sql index 30b5221f..2b326bbb 100644 --- a/pg-ddl/schema/frontend/companies_parent_category_stats__matview.sql +++ b/pg-ddl/schema/frontend/companies_parent_category_stats__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict ftjQSrHjeyHq4ywlUun67CTceiztXdpytfiikfLGhf2q5NXA8XLNdf16nqiZMM3 +\restrict MJnUbKabUSAeJRkmlaaDODBQubDWemW9sdYsUpCt1BrJgeDL2jkbI75Hzj6Mhwv --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -28,42 +28,28 @@ SET default_table_access_method = heap; -- CREATE MATERIALIZED VIEW frontend.companies_parent_category_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, - c.name AS company_name, - sa.installs, - sa.rating_count, - COALESCE(ad.domain_name, csac.ad_domain) AS company_domain - FROM (((adtech.combined_store_apps_companies csac + WITH distinct_apps_group AS ( + SELECT DISTINCT csac.store_app, + COALESCE(ad.domain_name, csac.ad_domain) AS company_domain, + c.name AS company_name + FROM ((adtech.combined_store_apps_companies csac LEFT JOIN adtech.companies c ON ((csac.parent_id = c.id))) LEFT JOIN public.domains ad ON ((c.domain_id = ad.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) WHERE (csac.parent_id IN ( SELECT DISTINCT pc.id FROM (adtech.companies pc LEFT JOIN adtech.companies c_1 ON ((pc.id = c_1.parent_company_id))) WHERE (c_1.id IS NOT NULL))) ) - SELECT dag.store, - dag.app_category, + SELECT sa.store, + sa.category AS app_category, dag.company_domain, dag.company_name, count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.company_domain, dag.company_name + LEFT JOIN frontend.store_apps_overview sa ON ((dag.store_app = sa.id))) + GROUP BY sa.store, sa.category, dag.company_domain, dag.company_name WITH NO DATA; @@ -87,5 +73,5 @@ CREATE INDEX companies_parent_category_stats_query_idx ON frontend.companies_par -- PostgreSQL database dump complete -- -\unrestrict ftjQSrHjeyHq4ywlUun67CTceiztXdpytfiikfLGhf2q5NXA8XLNdf16nqiZMM3 +\unrestrict MJnUbKabUSAeJRkmlaaDODBQubDWemW9sdYsUpCt1BrJgeDL2jkbI75Hzj6Mhwv diff --git a/pg-ddl/schema/frontend/companies_parent_category_tag_stats__matview.sql b/pg-ddl/schema/frontend/companies_parent_category_tag_stats__matview.sql index ea99cc2b..66c9d3fb 100644 --- a/pg-ddl/schema/frontend/companies_parent_category_tag_stats__matview.sql +++ b/pg-ddl/schema/frontend/companies_parent_category_tag_stats__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict mtqaIZJL2aEl0hNEC0uRsHlqnZPCpIzmDSXU2Mi4jXGgnFRRRSucUUn1H9LVpQ5 +\restrict AfTUUjvI0MandVZJdgUNpLJRMbaAyvsS2AZVcPNZTGNNjE92YAlEDt7d4RkmTnO --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -28,45 +28,31 @@ SET default_table_access_method = heap; -- CREATE MATERIALIZED VIEW frontend.companies_parent_category_tag_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, + WITH distinct_apps_group AS ( + SELECT DISTINCT csac.store_app, tag.tag_source, c.name AS company_name, - sa.installs, - sa.rating_count, COALESCE(ad.domain_name, csac.ad_domain) AS company_domain - FROM ((((adtech.combined_store_apps_companies csac + FROM (((adtech.combined_store_apps_companies csac LEFT JOIN adtech.companies c ON ((csac.parent_id = c.id))) LEFT JOIN public.domains ad ON ((c.domain_id = ad.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) WHERE ((tag.present IS TRUE) AND (csac.parent_id IN ( SELECT DISTINCT pc.id FROM (adtech.companies pc LEFT JOIN adtech.companies c_1 ON ((pc.id = c_1.parent_company_id))) WHERE (c_1.id IS NOT NULL)))) ) - SELECT dag.store, - dag.app_category, + SELECT sa.store, + sa.category AS app_category, dag.tag_source, dag.company_domain, dag.company_name, count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.tag_source, dag.company_domain, dag.company_name + LEFT JOIN frontend.store_apps_overview sa ON ((dag.store_app = sa.id))) + GROUP BY sa.store, sa.category, dag.tag_source, dag.company_domain, dag.company_name WITH NO DATA; @@ -90,5 +76,5 @@ CREATE INDEX companies_parent_category_tag_stats_query_idx ON frontend.companies -- PostgreSQL database dump complete -- -\unrestrict mtqaIZJL2aEl0hNEC0uRsHlqnZPCpIzmDSXU2Mi4jXGgnFRRRSucUUn1H9LVpQ5 +\unrestrict AfTUUjvI0MandVZJdgUNpLJRMbaAyvsS2AZVcPNZTGNNjE92YAlEDt7d4RkmTnO diff --git a/pg-ddl/schema/frontend/companies_sdks_overview__matview.sql b/pg-ddl/schema/frontend/companies_sdks_overview__matview.sql index 6f4e09a2..22ec9fef 100644 --- a/pg-ddl/schema/frontend/companies_sdks_overview__matview.sql +++ b/pg-ddl/schema/frontend/companies_sdks_overview__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict CFXhAY3XeCC3u2R44m6gyqKWvjWZqqLPXSX9p3JKWlAxkD9qyD5b5xJuJ3fp5zV +\restrict 2O9Uh0p44bvmyrcDV7JRG8nXbgiu7pSYBFuAAPvipLdWCeM17Our0aNC8Y2Tkdh --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -58,5 +58,5 @@ CREATE UNIQUE INDEX companies_sdks_overview_unique_idx ON frontend.companies_sdk -- PostgreSQL database dump complete -- -\unrestrict CFXhAY3XeCC3u2R44m6gyqKWvjWZqqLPXSX9p3JKWlAxkD9qyD5b5xJuJ3fp5zV +\unrestrict 2O9Uh0p44bvmyrcDV7JRG8nXbgiu7pSYBFuAAPvipLdWCeM17Our0aNC8Y2Tkdh diff --git a/pg-ddl/schema/frontend/company_domain_country__matview.sql b/pg-ddl/schema/frontend/company_domain_country__matview.sql index 3f5b60bb..471992cf 100644 --- a/pg-ddl/schema/frontend/company_domain_country__matview.sql +++ b/pg-ddl/schema/frontend/company_domain_country__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict RI4SdshAECYDoXMzrPWidZHjCnkgwC21ejvKsWVh6EYiiGeAV7laGhKkPE7Eb9r +\restrict lcSrrgce69pHFL7N4esHzhVeTuzFrEgJYNhJxLRhcdhFu6VNdwlFmvcZrYa11uv --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -74,5 +74,5 @@ ALTER MATERIALIZED VIEW frontend.company_domain_country OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict RI4SdshAECYDoXMzrPWidZHjCnkgwC21ejvKsWVh6EYiiGeAV7laGhKkPE7Eb9r +\unrestrict lcSrrgce69pHFL7N4esHzhVeTuzFrEgJYNhJxLRhcdhFu6VNdwlFmvcZrYa11uv diff --git a/pg-ddl/schema/frontend/company_domains_top_apps__matview.sql b/pg-ddl/schema/frontend/company_domains_top_apps__matview.sql index 86f9d9bf..9e86b7ff 100644 --- a/pg-ddl/schema/frontend/company_domains_top_apps__matview.sql +++ b/pg-ddl/schema/frontend/company_domains_top_apps__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 56mPSRUQOfwRx13jqVE4XDcleuRPqcTOSycTI8b59jmObveFPEbG8jPN2fxQThQ +\restrict p4r3O9Mo7OOo29ff3H5UuWh69mqm7Lx5kGsTwFGHS11LRc6VKAWD5rDiTk4TqTM --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -115,5 +115,5 @@ CREATE UNIQUE INDEX idx_unique_company_domains_top_apps ON frontend.company_doma -- PostgreSQL database dump complete -- -\unrestrict 56mPSRUQOfwRx13jqVE4XDcleuRPqcTOSycTI8b59jmObveFPEbG8jPN2fxQThQ +\unrestrict p4r3O9Mo7OOo29ff3H5UuWh69mqm7Lx5kGsTwFGHS11LRc6VKAWD5rDiTk4TqTM diff --git a/pg-ddl/schema/frontend/company_parent_top_apps__matview.sql b/pg-ddl/schema/frontend/company_parent_top_apps__matview.sql index feaffba5..abef44bf 100644 --- a/pg-ddl/schema/frontend/company_parent_top_apps__matview.sql +++ b/pg-ddl/schema/frontend/company_parent_top_apps__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict M2xu9zd1KOuKxgxFwW7rzqc4C8Ux2TGYQxrgTfKebN9bKjY0xMW6gyN3JIovDW0 +\restrict B3HnAdpabUc4L14zTJPw00HOeKmGAALjGQyQyK6svgywh61W9DY8sBWFLas7fVd --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -35,6 +35,8 @@ CREATE MATERIALIZED VIEW frontend.company_parent_top_apps AS sa.name, sa.store_id, csapc.app_category, + sa.developer_name, + sa.icon_url_100, sa.installs_sum_4w AS installs_d30, sa.ratings_sum_4w AS rating_count_d30, csapc.sdk, @@ -50,6 +52,8 @@ CREATE MATERIALIZED VIEW frontend.company_parent_top_apps AS deduped_data.store, deduped_data.name, deduped_data.store_id, + deduped_data.developer_name, + deduped_data.icon_url_100, deduped_data.app_category, deduped_data.installs_d30, deduped_data.rating_count_d30, @@ -65,7 +69,9 @@ CREATE MATERIALIZED VIEW frontend.company_parent_top_apps AS store, name, store_id, + developer_name, app_category, + icon_url_100, installs_d30, rating_count_d30, sdk, @@ -105,5 +111,5 @@ CREATE UNIQUE INDEX idx_company_parent_top_apps_unique ON frontend.company_paren -- PostgreSQL database dump complete -- -\unrestrict M2xu9zd1KOuKxgxFwW7rzqc4C8Ux2TGYQxrgTfKebN9bKjY0xMW6gyN3JIovDW0 +\unrestrict B3HnAdpabUc4L14zTJPw00HOeKmGAALjGQyQyK6svgywh61W9DY8sBWFLas7fVd diff --git a/pg-ddl/schema/frontend/company_top_apps__matview.sql b/pg-ddl/schema/frontend/company_top_apps__matview.sql index a5613ff1..78da14f3 100644 --- a/pg-ddl/schema/frontend/company_top_apps__matview.sql +++ b/pg-ddl/schema/frontend/company_top_apps__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict ByySsc1rPZtK7fUzU3T41XNSBEAeFw9CturzegTTznuhQxXJlCc7bFsLGdRfatZ +\restrict 0R5fSxwbKyGd7YaPgW6Z0RcY9vxS6FSMZHfglsoIM3CRRhDyr2PdQTrBc6AQJOm --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -35,6 +35,8 @@ CREATE MATERIALIZED VIEW frontend.company_top_apps AS sa.name, sa.store_id, cac.app_category, + sa.developer_name, + sa.icon_url_100, sa.installs_sum_4w AS installs_d30, sa.ratings_sum_4w AS rating_count_d30, cac.sdk, @@ -50,6 +52,8 @@ CREATE MATERIALIZED VIEW frontend.company_top_apps AS deduped_data.store, deduped_data.name, deduped_data.store_id, + deduped_data.developer_name, + deduped_data.icon_url_100, deduped_data.app_category, deduped_data.installs_d30, deduped_data.rating_count_d30, @@ -65,6 +69,8 @@ CREATE MATERIALIZED VIEW frontend.company_top_apps AS store, name, store_id, + developer_name, + icon_url_100, app_category, installs_d30, rating_count_d30, @@ -112,5 +118,5 @@ CREATE UNIQUE INDEX idx_unique_company_top_apps ON frontend.company_top_apps USI -- PostgreSQL database dump complete -- -\unrestrict ByySsc1rPZtK7fUzU3T41XNSBEAeFw9CturzegTTznuhQxXJlCc7bFsLGdRfatZ +\unrestrict 0R5fSxwbKyGd7YaPgW6Z0RcY9vxS6FSMZHfglsoIM3CRRhDyr2PdQTrBc6AQJOm diff --git a/pg-ddl/schema/frontend/keyword_scores__matview.sql b/pg-ddl/schema/frontend/keyword_scores__matview.sql index e8cfdf4e..0f89423d 100644 --- a/pg-ddl/schema/frontend/keyword_scores__matview.sql +++ b/pg-ddl/schema/frontend/keyword_scores__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict gBlVtjxh0AbxdzOemU0qCabGc4aWATtwvtsKsJuUU1o1BcKVFMSdyGly2CmKv7I +\restrict XaL7BcyRBVAt4FwL7eQKnzFlyjGyEKtl0L6eHaapHWeHMgZjkKIuAMaTlBcetgR --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -28,55 +28,121 @@ SET default_table_access_method = heap; -- CREATE MATERIALIZED VIEW frontend.keyword_scores AS - WITH latest_en_descriptions AS ( - SELECT DISTINCT ON (sad.store_app) sad.store_app, - sad.id AS description_id - FROM (public.store_apps_descriptions sad - JOIN public.description_keywords dk ON ((sad.id = dk.description_id))) - WHERE (sad.language_id = 1) - ORDER BY sad.store_app, sad.updated_at DESC - ), keyword_app_counts AS ( + WITH keyword_app_counts AS ( SELECT sa.store, k.keyword_text, - dk.keyword_id, - count(DISTINCT led.store_app) AS app_count - FROM (((latest_en_descriptions led - LEFT JOIN public.description_keywords dk ON ((led.description_id = dk.description_id))) - LEFT JOIN public.keywords k ON ((dk.keyword_id = k.id))) - LEFT JOIN public.store_apps sa ON ((led.store_app = sa.id))) - WHERE (dk.keyword_id IS NOT NULL) - GROUP BY sa.store, k.keyword_text, dk.keyword_id + ake.keyword_id, + count(DISTINCT ake.store_app) AS app_count, + array_length(string_to_array((k.keyword_text)::text, ' '::text), 1) AS word_count + FROM ((public.app_keywords_extracted ake + LEFT JOIN public.keywords k ON ((ake.keyword_id = k.id))) + LEFT JOIN public.store_apps sa ON ((ake.store_app = sa.id))) + GROUP BY sa.store, k.keyword_text, ake.keyword_id ), total_app_count AS ( SELECT sa.store, - count(*) AS total_apps - FROM (latest_en_descriptions led - LEFT JOIN public.store_apps sa ON ((led.store_app = sa.id))) + count(DISTINCT ake.store_app) AS total_apps + FROM (public.app_keywords_extracted ake + LEFT JOIN public.store_apps sa ON ((ake.store_app = sa.id))) GROUP BY sa.store + ), keyword_competitors AS ( + SELECT ake.keyword_id, + sa.store, + avg(COALESCE(NULLIF(agml.installs, 0), (agml.rating_count * 25))) AS avg_installs, + max(COALESCE(NULLIF(agml.installs, 0), (agml.rating_count * 25))) AS max_installs, + percentile_cont((0.5)::double precision) WITHIN GROUP (ORDER BY ((COALESCE(NULLIF(agml.installs, 0), (agml.rating_count * 25)))::double precision)) AS median_installs, + avg(agml.rating) AS avg_rating, + count(*) FILTER (WHERE (COALESCE(NULLIF(agml.installs, 0), (agml.rating_count * 25)) > 1000000)) AS apps_over_1m_installs, + count(*) FILTER (WHERE ((sa.name)::text ~~* (('%'::text || (k.keyword_text)::text) || '%'::text))) AS title_matches + FROM (((public.app_keywords_extracted ake + LEFT JOIN public.store_apps sa ON ((ake.store_app = sa.id))) + LEFT JOIN public.app_global_metrics_latest agml ON ((sa.id = agml.store_app))) + LEFT JOIN public.keywords k ON ((ake.keyword_id = k.id))) + GROUP BY ake.keyword_id, sa.store, k.keyword_text + ), keyword_metrics AS ( + SELECT kac.store, + kac.keyword_text, + kac.keyword_id, + kac.app_count, + round(kc.avg_installs, 0) AS avg_installs, + tac.total_apps, + round(((100.0 * (kac.app_count)::numeric) / (NULLIF(tac.total_apps, 0))::numeric), 2) AS market_penetration_pct, + round(((100)::numeric * (((1)::double precision - (ln(((tac.total_apps)::double precision / ((kac.app_count + 1))::double precision)) / ln((tac.total_apps)::double precision))))::numeric), 2) AS competitiveness_score, + kac.word_count, + CASE + WHEN (kac.word_count = 1) THEN 'short_tail'::text + WHEN (kac.word_count = 2) THEN 'medium_tail'::text + ELSE 'long_tail'::text + END AS keyword_type, + length((kac.keyword_text)::text) AS char_length, + (COALESCE(kc.avg_installs, (0)::numeric))::bigint AS avg_competitor_installs, + COALESCE(kc.max_installs, (0)::bigint) AS top_competitor_installs, + (COALESCE(kc.median_installs, (0)::double precision))::bigint AS median_competitor_installs, + COALESCE(kc.avg_rating, (0)::double precision) AS avg_competitor_rating, + COALESCE(kc.apps_over_1m_installs, (0)::bigint) AS major_competitors, + COALESCE(kc.title_matches, (0)::bigint) AS title_matches, + round(((100.0 * (COALESCE(kc.title_matches, (0)::bigint))::numeric) / (NULLIF(kac.app_count, 0))::numeric), 2) AS title_relevance_pct + FROM ((keyword_app_counts kac + LEFT JOIN total_app_count tac ON ((kac.store = tac.store))) + LEFT JOIN keyword_competitors kc ON (((kac.keyword_id = kc.keyword_id) AND (kac.store = kc.store)))) ) - SELECT kac.store, - kac.keyword_text, - kac.keyword_id, - kac.app_count, - tac.total_apps, - round(((100)::numeric * (((1)::double precision - (ln(((tac.total_apps)::double precision / ((kac.app_count + 1))::double precision)) / ln((tac.total_apps)::double precision))))::numeric), 2) AS competitiveness_score - FROM (keyword_app_counts kac - LEFT JOIN total_app_count tac ON ((kac.store = tac.store))) - ORDER BY (round(((100)::numeric * (((1)::double precision - (ln(((tac.total_apps)::double precision / ((kac.app_count + 1))::double precision)) / ln((tac.total_apps)::double precision))))::numeric), 2)) DESC + SELECT store, + keyword_text, + keyword_id, + app_count, + avg_installs, + total_apps, + market_penetration_pct, + competitiveness_score, + word_count, + keyword_type, + char_length, + avg_competitor_installs, + top_competitor_installs, + median_competitor_installs, + avg_competitor_rating, + major_competitors, + title_matches, + title_relevance_pct, + round(LEAST((100)::numeric, ((((app_count)::numeric * 10.0) * ((100)::numeric - competitiveness_score)) / 100.0)), 2) AS volume_competition_score, + round(LEAST((100)::numeric, ((competitiveness_score * 0.6) + (LEAST((100)::numeric, ((COALESCE(avg_competitor_installs, (0)::bigint))::numeric / 100000.0)) * 0.4))), 2) AS keyword_difficulty, + round( + CASE + WHEN (app_count < 10) THEN (0)::double precision + WHEN ((major_competitors)::numeric > ((app_count)::numeric * 0.25)) THEN (20)::double precision + ELSE ((LEAST((40)::double precision, (log(((app_count + 1))::double precision) * (10)::double precision)) + ((((100)::numeric - competitiveness_score) * 0.4))::double precision) + ( + CASE + WHEN (COALESCE(median_competitor_installs, (0)::bigint) < 100000) THEN 20 + WHEN (COALESCE(median_competitor_installs, (0)::bigint) < 1000000) THEN 15 + WHEN (COALESCE(median_competitor_installs, (0)::bigint) < 10000000) THEN 10 + ELSE 5 + END)::double precision) + END) AS opportunity_score, + CASE + WHEN (app_count > 0) THEN round(((((app_count)::numeric * 1000.0) * (1.0 / ((1)::numeric + (competitiveness_score / 50.0)))) * + CASE + WHEN (word_count = 1) THEN 2.0 + WHEN (word_count = 2) THEN 1.0 + ELSE 0.5 + END), 0) + ELSE (0)::numeric + END AS estimated_monthly_searches, + round(((100)::numeric - LEAST((100)::numeric, ((((major_competitors)::numeric * 10.0) + ((COALESCE(median_competitor_installs, (0)::bigint))::numeric / 100000.0)) + (competitiveness_score * 0.3)))), 2) AS ranking_feasibility + FROM keyword_metrics km WITH NO DATA; ALTER MATERIALIZED VIEW frontend.keyword_scores OWNER TO postgres; -- --- Name: keyword_scores_unique; Type: INDEX; Schema: frontend; Owner: postgres +-- Name: keyword_scores_store_keyword_id_idx; Type: INDEX; Schema: frontend; Owner: postgres -- -CREATE UNIQUE INDEX keyword_scores_unique ON frontend.keyword_scores USING btree (store, keyword_id); +CREATE UNIQUE INDEX keyword_scores_store_keyword_id_idx ON frontend.keyword_scores USING btree (store, keyword_id); -- -- PostgreSQL database dump complete -- -\unrestrict gBlVtjxh0AbxdzOemU0qCabGc4aWATtwvtsKsJuUU1o1BcKVFMSdyGly2CmKv7I +\unrestrict XaL7BcyRBVAt4FwL7eQKnzFlyjGyEKtl0L6eHaapHWeHMgZjkKIuAMaTlBcetgR diff --git a/pg-ddl/schema/frontend/latest_sdk_scanned_apps__matview.sql b/pg-ddl/schema/frontend/latest_sdk_scanned_apps__matview.sql index ad8abfe2..613bdfb6 100644 --- a/pg-ddl/schema/frontend/latest_sdk_scanned_apps__matview.sql +++ b/pg-ddl/schema/frontend/latest_sdk_scanned_apps__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict hk8MwGFkw2lYCHmDj9DkZI7XpvxjZqLBw0fjoBeX44xqTSEr5I3yCMFxZnUMQWi +\restrict oYNu5XFcbte5BrBHJY164jLfbrAbcb0VKVsux85IfQMxzitTlxFQsomFxf6kuVw --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -77,5 +77,5 @@ CREATE UNIQUE INDEX latest_sdk_scanned_apps_unique_index ON frontend.latest_sdk_ -- PostgreSQL database dump complete -- -\unrestrict hk8MwGFkw2lYCHmDj9DkZI7XpvxjZqLBw0fjoBeX44xqTSEr5I3yCMFxZnUMQWi +\unrestrict oYNu5XFcbte5BrBHJY164jLfbrAbcb0VKVsux85IfQMxzitTlxFQsomFxf6kuVw diff --git a/pg-ddl/schema/frontend/mediation_adapter_app_counts__matview.sql b/pg-ddl/schema/frontend/mediation_adapter_app_counts__matview.sql new file mode 100644 index 00000000..e00260cc --- /dev/null +++ b/pg-ddl/schema/frontend/mediation_adapter_app_counts__matview.sql @@ -0,0 +1,82 @@ +-- +-- PostgreSQL database dump +-- + +\restrict TDUizv9uLjVfo1PRzoiFfKgmEgnjGICx2niwkiBWUB3V9kFla9nISbBaZnefzj1 + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: mediation_adapter_app_counts; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- + +CREATE MATERIALIZED VIEW frontend.mediation_adapter_app_counts AS + WITH filter_mediation_strings AS ( + SELECT vs.id AS string_id, + sd.company_id AS mediation_company_id, + vs.value_name AS full_sdk, + regexp_replace(regexp_replace(vs.value_name, concat(cmp.mediation_pattern, '.'), ''::text), '\..*$'::text, ''::text) AS adapter_string + FROM ((public.version_strings vs + JOIN adtech.sdk_mediation_patterns cmp ON ((lower(vs.value_name) ~~ (lower(concat((cmp.mediation_pattern)::text, '.')) || '%'::text)))) + JOIN adtech.sdks sd ON ((cmp.sdk_id = sd.id))) + ), mediation_strings AS ( + SELECT fms.string_id, + fms.mediation_company_id, + cma.company_id AS adapter_company_id, + fms.adapter_string, + fms.full_sdk + FROM (filter_mediation_strings fms + LEFT JOIN adtech.company_mediation_adapters cma ON ((lower(fms.adapter_string) ~~ (lower((cma.adapter_pattern)::text) || '%'::text)))) + WHERE (fms.mediation_company_id <> cma.company_id) + ), app_counts AS ( + SELECT ms.mediation_company_id, + ms.adapter_string, + ms.adapter_company_id, + cm.mapped_category AS app_category, + count(DISTINCT sass.store_app) AS app_count + FROM (((adtech.store_app_sdk_strings sass + JOIN mediation_strings ms ON ((sass.version_string_id = ms.string_id))) + LEFT JOIN public.store_apps sa ON ((sass.store_app = sa.id))) + LEFT JOIN public.category_mapping cm ON (((sa.category)::text = (cm.original_category)::text))) + GROUP BY ms.mediation_company_id, ms.adapter_string, ms.adapter_company_id, cm.mapped_category + ) + SELECT md.domain_name AS mediation_domain, + ac.adapter_string, + ad.domain_name AS adapter_domain, + adc.name AS adapter_company_name, + adc.logo_url AS adapter_logo_url, + ac.app_category, + ac.app_count + FROM ((((app_counts ac + LEFT JOIN adtech.companies mdc ON ((ac.mediation_company_id = mdc.id))) + LEFT JOIN public.domains md ON ((mdc.domain_id = md.id))) + LEFT JOIN adtech.companies adc ON ((ac.adapter_company_id = adc.id))) + LEFT JOIN public.domains ad ON ((adc.domain_id = ad.id))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW frontend.mediation_adapter_app_counts OWNER TO postgres; + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict TDUizv9uLjVfo1PRzoiFfKgmEgnjGICx2niwkiBWUB3V9kFla9nISbBaZnefzj1 + diff --git a/pg-ddl/schema/frontend/store_app_api_companies__matview.sql b/pg-ddl/schema/frontend/store_app_api_companies__matview.sql index 8afcb008..1c0f7eb6 100644 --- a/pg-ddl/schema/frontend/store_app_api_companies__matview.sql +++ b/pg-ddl/schema/frontend/store_app_api_companies__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict BNbTlPX4bIJmdxtb6J2SbiGtahQ9KeYJVDdpUl3Kqr4L648srxtECMvBeb2oQuw +\restrict pTHm5Jkq0xxxSXid0ImieoUODlNukVUhTomost6Ewx68mEqjCUVCxTeskafUu9a --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -61,5 +61,5 @@ ALTER MATERIALIZED VIEW frontend.store_app_api_companies OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict BNbTlPX4bIJmdxtb6J2SbiGtahQ9KeYJVDdpUl3Kqr4L648srxtECMvBeb2oQuw +\unrestrict pTHm5Jkq0xxxSXid0ImieoUODlNukVUhTomost6Ewx68mEqjCUVCxTeskafUu9a diff --git a/pg-ddl/schema/frontend/store_app_ranks_best_monthly__matview.sql b/pg-ddl/schema/frontend/store_app_ranks_best_monthly__matview.sql index 160239fd..8823f393 100644 --- a/pg-ddl/schema/frontend/store_app_ranks_best_monthly__matview.sql +++ b/pg-ddl/schema/frontend/store_app_ranks_best_monthly__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict nQgANLnruuGTJND1mnoH3XlQEjA6FOwcnPYBaZt2TK8pQlejoo8d2aiWswhoGJE +\restrict vHeJBy0gdy99hhHyYuZucy8gS8MjvuClce7ZxnYPleXEJSnkCdseLDnAOGERZHC --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -71,5 +71,5 @@ CREATE UNIQUE INDEX store_app_ranks_best_monthly_uidx ON frontend.store_app_rank -- PostgreSQL database dump complete -- -\unrestrict nQgANLnruuGTJND1mnoH3XlQEjA6FOwcnPYBaZt2TK8pQlejoo8d2aiWswhoGJE +\unrestrict vHeJBy0gdy99hhHyYuZucy8gS8MjvuClce7ZxnYPleXEJSnkCdseLDnAOGERZHC diff --git a/pg-ddl/schema/frontend/store_app_ranks_daily.sql b/pg-ddl/schema/frontend/store_app_ranks_daily.sql index dcbb2f6d..8a43572b 100644 --- a/pg-ddl/schema/frontend/store_app_ranks_daily.sql +++ b/pg-ddl/schema/frontend/store_app_ranks_daily.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict fWjyF6I9VKp4hOOteeXx9yfvmmwmKBP8Gjc4f2Ay3de5vxu2odqrcZ5KatchEb1 +\restrict g1d9NjqqokgtgxIQDVOGJCrlNf7G1lxfhVN4u6NxahRgQ2gyebeJxOz23hNWUNR --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -91,5 +91,5 @@ ALTER TABLE ONLY frontend.store_app_ranks_daily -- PostgreSQL database dump complete -- -\unrestrict fWjyF6I9VKp4hOOteeXx9yfvmmwmKBP8Gjc4f2Ay3de5vxu2odqrcZ5KatchEb1 +\unrestrict g1d9NjqqokgtgxIQDVOGJCrlNf7G1lxfhVN4u6NxahRgQ2gyebeJxOz23hNWUNR diff --git a/pg-ddl/schema/frontend/store_app_ranks_latest__matview.sql b/pg-ddl/schema/frontend/store_app_ranks_latest__matview.sql index fe35c99d..d36d64c1 100644 --- a/pg-ddl/schema/frontend/store_app_ranks_latest__matview.sql +++ b/pg-ddl/schema/frontend/store_app_ranks_latest__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict QGNvzDgl5pzqMoectg8ijXLNs1beGeYGrjuslkLq9mqgHkhFtjgcbouZHILVdmh +\restrict 61L69Gi90FrgcfbVERiZF9GRRShqFKQRqJpAgNhgcu055vWAR1e2TATO299Ouy9 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -39,6 +39,7 @@ CREATE MATERIALIZED VIEW frontend.store_app_ranks_latest AS sa.name, sa.store_id, sa.store, + sa.developer_name, sa.installs, sa.rating_count, sa.rating, @@ -72,5 +73,5 @@ CREATE UNIQUE INDEX idx_store_app_ranks_latest_filter_sort ON frontend.store_app -- PostgreSQL database dump complete -- -\unrestrict QGNvzDgl5pzqMoectg8ijXLNs1beGeYGrjuslkLq9mqgHkhFtjgcbouZHILVdmh +\unrestrict 61L69Gi90FrgcfbVERiZF9GRRShqFKQRqJpAgNhgcu055vWAR1e2TATO299Ouy9 diff --git a/pg-ddl/schema/frontend/store_app_ranks_weekly.sql b/pg-ddl/schema/frontend/store_app_ranks_weekly.sql index 23ea2338..c6e23f2f 100644 --- a/pg-ddl/schema/frontend/store_app_ranks_weekly.sql +++ b/pg-ddl/schema/frontend/store_app_ranks_weekly.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict h183fGBzdzkyFZFgvvje69uVbAGBzoQQj595d8h3GVC0iWEDs0CpaX7QxWUqM93 +\restrict hfbIAaZ7KS6Hpa2p4dH9nwiqfYE0HuOdlW9mg2IobJs3f8NW7OKgB5UAB5cywQh --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -98,5 +98,5 @@ ALTER TABLE ONLY frontend.store_app_ranks_weekly -- PostgreSQL database dump complete -- -\unrestrict h183fGBzdzkyFZFgvvje69uVbAGBzoQQj595d8h3GVC0iWEDs0CpaX7QxWUqM93 +\unrestrict hfbIAaZ7KS6Hpa2p4dH9nwiqfYE0HuOdlW9mg2IobJs3f8NW7OKgB5UAB5cywQh diff --git a/pg-ddl/schema/frontend/store_apps_overview__matview.sql b/pg-ddl/schema/frontend/store_apps_overview__matview.sql index 61e90b61..4e2679d7 100644 --- a/pg-ddl/schema/frontend/store_apps_overview__matview.sql +++ b/pg-ddl/schema/frontend/store_apps_overview__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict Rbxh9jMb42s8eRdhP1dxUOhDfqbnOVDlOzhYdoIsJWn0o17BNd6s9Qy89gde2G3 +\restrict xEmarDhA1mWQ3a9Mri1Ib0VdLmte29IqrhQOgAYZtlK2acqaMw4jTVkOKnKxrxs --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -134,6 +134,7 @@ CREATE MATERIALIZED VIEW frontend.store_apps_overview AS sa.tablet_image_url_1, sa.tablet_image_url_2, sa.tablet_image_url_3, + to_tsvector('simple'::regconfig, (((((COALESCE(sa.name, ''::character varying))::text || ' '::text) || (COALESCE(sa.store_id, ''::character varying))::text) || ' '::text) || (COALESCE(d.name, ''::character varying))::text)) AS textsearchable, d.developer_id, d.name AS developer_name, pd.domain_name AS developer_url, @@ -149,7 +150,9 @@ CREATE MATERIALIZED VIEW frontend.store_apps_overview AS lac.run_result, lsac.run_at AS api_successful_last_crawled, acr.ad_creative_count, - amc.ad_mon_creatives + amc.ad_mon_creatives, + GREATEST(COALESCE(am.installs, (0)::bigint), (COALESCE(am.rating_count, (0)::bigint) * 50)) AS installs_est, + GREATEST(COALESCE(saz.installs_sum_4w, (0)::numeric), (COALESCE(saz.ratings_sum_4w, (0)::numeric) * (50)::numeric)) AS installs_sum_4w_est FROM ((((((((((((((((public.store_apps sa LEFT JOIN public.category_mapping cm ON (((sa.category)::text = (cm.original_category)::text))) LEFT JOIN public.developers d ON ((sa.developer = d.id))) @@ -172,6 +175,34 @@ CREATE MATERIALIZED VIEW frontend.store_apps_overview AS ALTER MATERIALIZED VIEW frontend.store_apps_overview OWNER TO postgres; +-- +-- Name: store_apps_overview_installs_est_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX store_apps_overview_installs_est_idx ON frontend.store_apps_overview USING btree (installs_est DESC); + + +-- +-- Name: store_apps_overview_installs_sum_4w_est_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX store_apps_overview_installs_sum_4w_est_idx ON frontend.store_apps_overview USING btree (installs_sum_4w_est DESC); + + +-- +-- Name: store_apps_overview_store_last_updated_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX store_apps_overview_store_last_updated_idx ON frontend.store_apps_overview USING btree (store_last_updated); + + +-- +-- Name: store_apps_overview_textsearch_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX store_apps_overview_textsearch_idx ON frontend.store_apps_overview USING gin (textsearchable); + + -- -- Name: store_apps_overview_unique_idx; Type: INDEX; Schema: frontend; Owner: postgres -- @@ -197,5 +228,5 @@ CREATE UNIQUE INDEX store_apps_overview_unique_store_id_idx ON frontend.store_ap -- PostgreSQL database dump complete -- -\unrestrict Rbxh9jMb42s8eRdhP1dxUOhDfqbnOVDlOzhYdoIsJWn0o17BNd6s9Qy89gde2G3 +\unrestrict xEmarDhA1mWQ3a9Mri1Ib0VdLmte29IqrhQOgAYZtlK2acqaMw4jTVkOKnKxrxs diff --git a/pg-ddl/schema/frontend/total_categories_app_counts__matview.sql b/pg-ddl/schema/frontend/total_categories_app_counts__matview.sql deleted file mode 100644 index 468323d2..00000000 --- a/pg-ddl/schema/frontend/total_categories_app_counts__matview.sql +++ /dev/null @@ -1,57 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict Qfev810d5gN5bcwDBKDUi40qcsssVGq5bnxzNNzCUOvLHrzunMXrje4npQICE49 - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: total_categories_app_counts; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres --- - -CREATE MATERIALIZED VIEW frontend.total_categories_app_counts AS - SELECT sa.store, - tag.tag_source, - csac.app_category, - count(DISTINCT csac.store_app) AS app_count - FROM ((adtech.combined_store_apps_companies csac - LEFT JOIN public.store_apps sa ON ((csac.store_app = sa.id))) - CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) - WHERE (tag.present IS TRUE) - GROUP BY sa.store, tag.tag_source, csac.app_category - WITH NO DATA; - - -ALTER MATERIALIZED VIEW frontend.total_categories_app_counts OWNER TO postgres; - --- --- Name: idx_total_categories_app_counts; Type: INDEX; Schema: frontend; Owner: postgres --- - -CREATE UNIQUE INDEX idx_total_categories_app_counts ON frontend.total_categories_app_counts USING btree (store, tag_source, app_category); - - --- --- PostgreSQL database dump complete --- - -\unrestrict Qfev810d5gN5bcwDBKDUi40qcsssVGq5bnxzNNzCUOvLHrzunMXrje4npQICE49 - diff --git a/pg-ddl/schema/frontend/store_apps_z_scores__matview.sql b/pg-ddl/schema/frontend/z_scores_top_apps__matview.sql similarity index 66% rename from pg-ddl/schema/frontend/store_apps_z_scores__matview.sql rename to pg-ddl/schema/frontend/z_scores_top_apps__matview.sql index f7cfe914..7491bf26 100644 --- a/pg-ddl/schema/frontend/store_apps_z_scores__matview.sql +++ b/pg-ddl/schema/frontend/z_scores_top_apps__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict y4jeAoEcTb1EkSfj93x1SdYMtzLyeh0LZtGqXiIEfAi6iamlX2FLPgbuG3IRwBp +\restrict IM5HO4aX6yUwJJcunxx3jPQI5JKCX0kL0e8TbJWnhf5UAwe61CR6VNiOqESrnkv --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -24,10 +24,10 @@ SET default_tablespace = ''; SET default_table_access_method = heap; -- --- Name: store_apps_z_scores; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- Name: z_scores_top_apps; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres -- -CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS +CREATE MATERIALIZED VIEW frontend.z_scores_top_apps AS WITH app_metrics AS ( SELECT app_global_metrics_latest.store_app, app_global_metrics_latest.rating, @@ -43,23 +43,22 @@ CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS saz.ratings_avg_2w, saz.installs_z_score_2w, saz.ratings_z_score_2w, - saz.installs_sum_4w, + sa.installs_sum_4w_est AS installs_sum_4w, saz.ratings_sum_4w, saz.installs_avg_4w, saz.ratings_avg_4w, saz.installs_z_score_4w, saz.ratings_z_score_4w, sa.id, - sa.developer, + sa.developer_id, + sa.developer_name, sa.name, sa.store_id, sa.store, - sa.category, - am.installs, + sa.category AS app_category, + sa.installs_est AS installs, sa.free, - sa.price, sa.store_last_updated, - sa.content_rating, sa.ad_supported, sa.in_app_purchases, sa.created_at, @@ -68,9 +67,7 @@ CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS sa.release_date, am.rating_count, sa.icon_url_100, - cm.original_category, - cm.mapped_category, - row_number() OVER (PARTITION BY sa.store, cm.mapped_category, + row_number() OVER (PARTITION BY sa.store, sa.category, CASE WHEN (sa.store = 2) THEN 'rating'::text ELSE 'installs'::text @@ -80,15 +77,16 @@ CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS WHEN (sa.store = 1) THEN saz.installs_z_score_2w ELSE NULL::numeric END DESC NULLS LAST) AS rn - FROM (((public.store_app_z_scores saz - LEFT JOIN public.store_apps sa ON ((saz.store_app = sa.id))) + FROM ((public.store_app_z_scores saz + LEFT JOIN frontend.store_apps_overview sa ON ((saz.store_app = sa.id))) LEFT JOIN app_metrics am ON ((saz.store_app = am.store_app))) - LEFT JOIN public.category_mapping cm ON (((sa.category)::text = (cm.original_category)::text))) + WHERE (sa.store = ANY (ARRAY[1, 2])) ) SELECT store, store_id, name AS app_name, - mapped_category AS app_category, + developer_name, + app_category, in_app_purchases, ad_supported, icon_url_100, @@ -111,18 +109,18 @@ CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS WITH NO DATA; -ALTER MATERIALIZED VIEW frontend.store_apps_z_scores OWNER TO postgres; +ALTER MATERIALIZED VIEW frontend.z_scores_top_apps OWNER TO postgres; -- --- Name: frontend_store_apps_z_scores_unique; Type: INDEX; Schema: frontend; Owner: postgres +-- Name: frontend_z_scores_top_apps_unique; Type: INDEX; Schema: frontend; Owner: postgres -- -CREATE UNIQUE INDEX frontend_store_apps_z_scores_unique ON frontend.store_apps_z_scores USING btree (store, store_id); +CREATE UNIQUE INDEX frontend_z_scores_top_apps_unique ON frontend.z_scores_top_apps USING btree (store, store_id); -- -- PostgreSQL database dump complete -- -\unrestrict y4jeAoEcTb1EkSfj93x1SdYMtzLyeh0LZtGqXiIEfAi6iamlX2FLPgbuG3IRwBp +\unrestrict IM5HO4aX6yUwJJcunxx3jPQI5JKCX0kL0e8TbJWnhf5UAwe61CR6VNiOqESrnkv diff --git a/pg-ddl/schema/full_db_dump.sql b/pg-ddl/schema/full_db_dump.sql index 9d8c822f..618fd72d 100644 --- a/pg-ddl/schema/full_db_dump.sql +++ b/pg-ddl/schema/full_db_dump.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict vYe6z6bKpKdnNtZ6GXBJHvdhjuVYdAYwdbpsJfGPACUaXkI2Lmc94ZXYOPiUHhG +\restrict aqSyUPuJghUSivRpPmJ0wjpiuNqZ0Xb7iDHpQcCoga4fxvxnfEJs5GfAimQHJSS --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -462,6 +462,18 @@ CREATE TABLE adtech.company_domain_mapping ( ALTER TABLE adtech.company_domain_mapping OWNER TO postgres; +-- +-- Name: sdk_mediation_patterns; Type: TABLE; Schema: adtech; Owner: postgres +-- + +CREATE TABLE adtech.sdk_mediation_patterns ( + sdk_id integer NOT NULL, + mediation_pattern character varying(255) NOT NULL +); + + +ALTER TABLE adtech.sdk_mediation_patterns OWNER TO postgres; + -- -- Name: sdk_packages; Type: TABLE; Schema: adtech; Owner: postgres -- @@ -488,25 +500,6 @@ CREATE TABLE adtech.sdk_paths ( ALTER TABLE adtech.sdk_paths OWNER TO postgres; --- --- Name: sdks; Type: TABLE; Schema: adtech; Owner: postgres --- - -CREATE TABLE adtech.sdks ( - id integer NOT NULL, - company_id integer NOT NULL, - sdk_name character varying(255) NOT NULL, - sdk_url character varying(255), - is_open_source boolean DEFAULT false, - has_third_party_tracking boolean DEFAULT true, - license_type character varying(50) DEFAULT 'Commercial'::character varying, - created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, - sdk_slug text -); - - -ALTER TABLE adtech.sdks OWNER TO postgres; - -- -- Name: version_strings; Type: TABLE; Schema: public; Owner: postgres -- @@ -522,36 +515,64 @@ CREATE TABLE public.version_strings ( ALTER TABLE public.version_strings OWNER TO postgres; -- --- Name: company_sdk_strings; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- Name: sdk_strings; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres -- -CREATE MATERIALIZED VIEW adtech.company_sdk_strings AS +CREATE MATERIALIZED VIEW adtech.sdk_strings AS WITH matched_value_patterns AS ( SELECT DISTINCT lower(vd.value_name) AS value_name_lower, - sd.company_id - FROM ((public.version_strings vd + sp.sdk_id + FROM (public.version_strings vd JOIN adtech.sdk_packages sp ON ((lower(vd.value_name) ~~ (lower((sp.package_pattern)::text) || '%'::text)))) - JOIN adtech.sdks sd ON ((sp.sdk_id = sd.id))) ), matched_path_patterns AS ( SELECT DISTINCT lower(vd.xml_path) AS xml_path_lower, - sd.company_id - FROM ((public.version_strings vd + ptm.sdk_id + FROM (public.version_strings vd JOIN adtech.sdk_paths ptm ON ((lower(vd.xml_path) = lower((ptm.path_pattern)::text)))) - JOIN adtech.sdks sd ON ((ptm.sdk_id = sd.id))) + ), mediation_strings AS ( + SELECT vs.id AS version_string_id, + cmp.sdk_id, + lower(vs.value_name) AS value_name_lower + FROM (public.version_strings vs + JOIN adtech.sdk_mediation_patterns cmp ON ((lower(vs.value_name) ~~ (lower(concat((cmp.mediation_pattern)::text, '.')) || '%'::text)))) ) SELECT vs.id AS version_string_id, - mp.company_id + mp.sdk_id FROM (matched_value_patterns mp JOIN public.version_strings vs ON ((lower(vs.value_name) = mp.value_name_lower))) UNION SELECT vs.id AS version_string_id, - mp.company_id + mp.sdk_id FROM (matched_path_patterns mp JOIN public.version_strings vs ON ((lower(vs.xml_path) = mp.xml_path_lower))) +UNION + SELECT vs.id AS version_string_id, + ms.sdk_id + FROM (mediation_strings ms + JOIN public.version_strings vs ON ((lower(vs.value_name) = ms.value_name_lower))) WITH NO DATA; -ALTER MATERIALIZED VIEW adtech.company_sdk_strings OWNER TO postgres; +ALTER MATERIALIZED VIEW adtech.sdk_strings OWNER TO postgres; + +-- +-- Name: sdks; Type: TABLE; Schema: adtech; Owner: postgres +-- + +CREATE TABLE adtech.sdks ( + id integer NOT NULL, + company_id integer NOT NULL, + sdk_name character varying(255) NOT NULL, + sdk_url character varying(255), + is_open_source boolean DEFAULT false, + has_third_party_tracking boolean DEFAULT true, + license_type character varying(50) DEFAULT 'Commercial'::character varying, + created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP, + sdk_slug text +); + + +ALTER TABLE adtech.sdks OWNER TO postgres; -- -- Name: version_code_sdk_scan_results; Type: TABLE; Schema: public; Owner: postgres @@ -615,10 +636,10 @@ CREATE MATERIALIZED VIEW adtech.store_app_sdk_strings AS ) SELECT vc.store_app, vdm.string_id AS version_string_id, - css.company_id + css.sdk_id FROM ((latest_version_codes vc JOIN public.version_details_map vdm ON ((vc.id = vdm.version_code))) - LEFT JOIN adtech.company_sdk_strings css ON ((vdm.string_id = css.version_string_id))) + LEFT JOIN adtech.sdk_strings css ON ((vdm.string_id = css.version_string_id))) WITH NO DATA; @@ -844,6 +865,8 @@ CREATE TABLE public.creative_records ( additional_ad_domain_ids integer[], created_at timestamp with time zone DEFAULT timezone('utc'::text, now()) NOT NULL, updated_at timestamp with time zone DEFAULT timezone('utc'::text, now()) NOT NULL, + click_ids integer[], + click_url_ids integer[], CONSTRAINT check_advertiser_or_advertiser_domain CHECK (((advertiser_store_app_id IS NOT NULL) OR (advertiser_domain_id IS NOT NULL) OR ((advertiser_store_app_id IS NULL) AND (advertiser_domain_id IS NULL)))) ); @@ -1152,6 +1175,7 @@ CREATE MATERIALIZED VIEW frontend.store_apps_overview AS sa.tablet_image_url_1, sa.tablet_image_url_2, sa.tablet_image_url_3, + to_tsvector('simple'::regconfig, (((((COALESCE(sa.name, ''::character varying))::text || ' '::text) || (COALESCE(sa.store_id, ''::character varying))::text) || ' '::text) || (COALESCE(d.name, ''::character varying))::text)) AS textsearchable, d.developer_id, d.name AS developer_name, pd.domain_name AS developer_url, @@ -1167,7 +1191,9 @@ CREATE MATERIALIZED VIEW frontend.store_apps_overview AS lac.run_result, lsac.run_at AS api_successful_last_crawled, acr.ad_creative_count, - amc.ad_mon_creatives + amc.ad_mon_creatives, + GREATEST(COALESCE(am.installs, (0)::bigint), (COALESCE(am.rating_count, (0)::bigint) * 50)) AS installs_est, + GREATEST(COALESCE(saz.installs_sum_4w, (0)::numeric), (COALESCE(saz.ratings_sum_4w, (0)::numeric) * (50)::numeric)) AS installs_sum_4w_est FROM ((((((((((((((((public.store_apps sa LEFT JOIN public.category_mapping cm ON (((sa.category)::text = (cm.original_category)::text))) LEFT JOIN public.developers d ON ((sa.developer = d.id))) @@ -1255,16 +1281,17 @@ CREATE MATERIALIZED VIEW adtech.combined_store_apps_companies AS LEFT JOIN public.domains d ON ((c_1.domain_id = d.id))) LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) ), sdk_based_companies AS ( - SELECT DISTINCT sac.store_app, + SELECT DISTINCT sasd.store_app, cm.mapped_category AS app_category, sac.company_id, ad_1.domain_name AS ad_domain, 'sdk'::text AS tag_source, COALESCE(c_1.parent_company_id, sac.company_id) AS parent_id - FROM ((((adtech.store_app_sdk_strings sac + FROM (((((adtech.store_app_sdk_strings sasd + LEFT JOIN adtech.sdks sac ON ((sac.id = sasd.sdk_id))) LEFT JOIN adtech.companies c_1 ON ((sac.company_id = c_1.id))) LEFT JOIN public.domains ad_1 ON ((c_1.domain_id = ad_1.id))) - LEFT JOIN public.store_apps sa_1 ON ((sac.store_app = sa_1.id))) + LEFT JOIN public.store_apps sa_1 ON ((sasd.store_app = sa_1.id))) LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) ), distinct_ad_and_pub_domains AS ( SELECT DISTINCT pd.domain_name AS publisher_domain_url, @@ -1353,6 +1380,256 @@ CREATE MATERIALIZED VIEW adtech.combined_store_apps_companies AS ALTER MATERIALIZED VIEW adtech.combined_store_apps_companies OWNER TO postgres; +-- +-- Name: store_app_sdk_strings_2025_h1; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.store_app_sdk_strings_2025_h1 AS + WITH latest_version_codes AS ( + SELECT DISTINCT ON (vc_1.store_app) vc_1.id, + vc_1.store_app, + vc_1.version_code, + vc_1.updated_at, + vc_1.crawl_result + FROM (public.version_codes vc_1 + JOIN public.version_code_sdk_scan_results vcssr ON ((vc_1.id = vcssr.version_code_id))) + WHERE ((vcssr.scan_result = 1) AND (vc_1.updated_at >= '2025-01-01 00:00:00'::timestamp without time zone) AND (vc_1.updated_at < '2025-07-01 00:00:00'::timestamp without time zone)) + ORDER BY vc_1.store_app, (string_to_array((vc_1.version_code)::text, '.'::text))::bigint[] DESC + ) + SELECT vc.store_app, + vdm.string_id AS version_string_id, + sd.id AS sdk_id + FROM (((latest_version_codes vc + JOIN public.version_details_map vdm ON ((vc.id = vdm.version_code))) + JOIN adtech.sdk_strings css ON ((vdm.string_id = css.version_string_id))) + JOIN adtech.sdks sd ON ((css.sdk_id = sd.id))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.store_app_sdk_strings_2025_h1 OWNER TO postgres; + +-- +-- Name: combined_store_apps_companies_2025_h1; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.combined_store_apps_companies_2025_h1 AS + WITH api_based_companies AS ( + SELECT DISTINCT saac.store_app, + cm.mapped_category AS app_category, + cdm.company_id, + c_1.parent_company_id AS parent_id, + 'api_call'::text AS tag_source, + COALESCE(cad_1.domain_name, (saac.tld_url)::character varying) AS ad_domain + FROM ((((((public.api_calls saac + LEFT JOIN public.store_apps sa_1 ON ((saac.store_app = sa_1.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + LEFT JOIN public.domains ad_1 ON ((saac.tld_url = (ad_1.domain_name)::text))) + LEFT JOIN adtech.company_domain_mapping cdm ON ((ad_1.id = cdm.domain_id))) + LEFT JOIN adtech.companies c_1 ON ((cdm.company_id = c_1.id))) + LEFT JOIN public.domains cad_1 ON ((c_1.domain_id = cad_1.id))) + WHERE ((saac.called_at >= '2025-01-01 00:00:00'::timestamp without time zone) AND (saac.called_at < '2025-07-01 00:00:00'::timestamp without time zone)) + ), developer_based_companies AS ( + SELECT DISTINCT sa_1.id AS store_app, + cm.mapped_category AS app_category, + cd.company_id, + d.domain_name AS ad_domain, + 'developer'::text AS tag_source, + COALESCE(c_1.parent_company_id, cd.company_id) AS parent_id + FROM ((((adtech.company_developers cd + LEFT JOIN public.store_apps sa_1 ON ((cd.developer_id = sa_1.developer))) + LEFT JOIN adtech.companies c_1 ON ((cd.company_id = c_1.id))) + LEFT JOIN public.domains d ON ((c_1.domain_id = d.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + ), sdk_based_companies AS ( + SELECT DISTINCT sasd.store_app, + cm.mapped_category AS app_category, + sac.company_id, + ad_1.domain_name AS ad_domain, + 'sdk'::text AS tag_source, + COALESCE(c_1.parent_company_id, sac.company_id) AS parent_id + FROM (((((adtech.store_app_sdk_strings_2025_h1 sasd + LEFT JOIN adtech.sdks sac ON ((sac.id = sasd.sdk_id))) + LEFT JOIN adtech.companies c_1 ON ((sac.company_id = c_1.id))) + LEFT JOIN public.domains ad_1 ON ((c_1.domain_id = ad_1.id))) + LEFT JOIN public.store_apps sa_1 ON ((sasd.store_app = sa_1.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + ), distinct_ad_and_pub_domains AS ( + SELECT DISTINCT pd.domain_name AS publisher_domain_url, + ad_1.domain_name AS ad_domain_url, + aae.relationship + FROM ((((public.app_ads_entrys aae + LEFT JOIN public.domains ad_1 ON ((aae.ad_domain = ad_1.id))) + LEFT JOIN public.app_ads_map aam ON ((aae.id = aam.app_ads_entry))) + LEFT JOIN public.domains pd ON ((aam.pub_domain = pd.id))) + LEFT JOIN public.adstxt_crawl_results pdcr ON ((pd.id = pdcr.domain_id))) + WHERE ((pdcr.crawled_at - aam.updated_at) < '01:00:00'::interval) + ), combined_sources AS ( + SELECT api_based_companies.store_app, + api_based_companies.app_category, + api_based_companies.company_id, + api_based_companies.parent_id, + api_based_companies.ad_domain, + api_based_companies.tag_source + FROM api_based_companies + UNION ALL + SELECT sdk_based_companies.store_app, + sdk_based_companies.app_category, + sdk_based_companies.company_id, + sdk_based_companies.parent_id, + sdk_based_companies.ad_domain, + sdk_based_companies.tag_source + FROM sdk_based_companies + ) + SELECT cs.ad_domain, + cs.store_app, + sa.category AS app_category, + c.id AS company_id, + COALESCE(c.parent_company_id, c.id) AS parent_id, + CASE + WHEN (sa.sdk_successful_last_crawled IS NOT NULL) THEN bool_or((cs.tag_source = 'sdk'::text)) + ELSE NULL::boolean + END AS sdk, + CASE + WHEN (sa.api_successful_last_crawled IS NOT NULL) THEN bool_or((cs.tag_source = 'api_call'::text)) + ELSE NULL::boolean + END AS api_call, + bool_or((cs.tag_source = 'app_ads_direct'::text)) AS app_ads_direct, + bool_or((cs.tag_source = 'app_ads_reseller'::text)) AS app_ads_reseller + FROM (((combined_sources cs + LEFT JOIN frontend.store_apps_overview sa ON ((cs.store_app = sa.id))) + LEFT JOIN public.domains ad ON (((cs.ad_domain)::text = (ad.domain_name)::text))) + LEFT JOIN adtech.companies c ON ((ad.id = c.domain_id))) + GROUP BY cs.ad_domain, cs.store_app, sa.category, c.id, c.parent_company_id, sa.sdk_successful_last_crawled, sa.api_successful_last_crawled + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.combined_store_apps_companies_2025_h1 OWNER TO postgres; + +-- +-- Name: store_app_sdk_strings_2025_h2; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.store_app_sdk_strings_2025_h2 AS + WITH latest_version_codes AS ( + SELECT DISTINCT ON (vc_1.store_app) vc_1.id, + vc_1.store_app, + vc_1.version_code, + vc_1.updated_at, + vc_1.crawl_result + FROM (public.version_codes vc_1 + JOIN public.version_code_sdk_scan_results vcssr ON ((vc_1.id = vcssr.version_code_id))) + WHERE ((vcssr.scan_result = 1) AND (vc_1.updated_at >= '2025-01-01 00:00:00'::timestamp without time zone) AND (vc_1.updated_at < '2026-01-01 00:00:00'::timestamp without time zone)) + ORDER BY vc_1.store_app, (string_to_array((vc_1.version_code)::text, '.'::text))::bigint[] DESC + ) + SELECT vc.store_app, + vdm.string_id AS version_string_id, + sd.id AS sdk_id + FROM (((latest_version_codes vc + JOIN public.version_details_map vdm ON ((vc.id = vdm.version_code))) + JOIN adtech.sdk_strings css ON ((vdm.string_id = css.version_string_id))) + JOIN adtech.sdks sd ON ((css.sdk_id = sd.id))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.store_app_sdk_strings_2025_h2 OWNER TO postgres; + +-- +-- Name: combined_store_apps_companies_2025_h2; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.combined_store_apps_companies_2025_h2 AS + WITH api_based_companies AS ( + SELECT DISTINCT saac.store_app, + cm.mapped_category AS app_category, + cdm.company_id, + c_1.parent_company_id AS parent_id, + 'api_call'::text AS tag_source, + COALESCE(cad_1.domain_name, (saac.tld_url)::character varying) AS ad_domain + FROM ((((((public.api_calls saac + LEFT JOIN public.store_apps sa_1 ON ((saac.store_app = sa_1.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + LEFT JOIN public.domains ad_1 ON ((saac.tld_url = (ad_1.domain_name)::text))) + LEFT JOIN adtech.company_domain_mapping cdm ON ((ad_1.id = cdm.domain_id))) + LEFT JOIN adtech.companies c_1 ON ((cdm.company_id = c_1.id))) + LEFT JOIN public.domains cad_1 ON ((c_1.domain_id = cad_1.id))) + WHERE ((saac.called_at >= '2025-01-01 00:00:00'::timestamp without time zone) AND (saac.called_at < '2026-01-01 00:00:00'::timestamp without time zone)) + ), developer_based_companies AS ( + SELECT DISTINCT sa_1.id AS store_app, + cm.mapped_category AS app_category, + cd.company_id, + d.domain_name AS ad_domain, + 'developer'::text AS tag_source, + COALESCE(c_1.parent_company_id, cd.company_id) AS parent_id + FROM ((((adtech.company_developers cd + LEFT JOIN public.store_apps sa_1 ON ((cd.developer_id = sa_1.developer))) + LEFT JOIN adtech.companies c_1 ON ((cd.company_id = c_1.id))) + LEFT JOIN public.domains d ON ((c_1.domain_id = d.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + ), sdk_based_companies AS ( + SELECT DISTINCT sasd.store_app, + cm.mapped_category AS app_category, + sac.company_id, + ad_1.domain_name AS ad_domain, + 'sdk'::text AS tag_source, + COALESCE(c_1.parent_company_id, sac.company_id) AS parent_id + FROM (((((adtech.store_app_sdk_strings_2025_h2 sasd + LEFT JOIN adtech.sdks sac ON ((sac.id = sasd.sdk_id))) + LEFT JOIN adtech.companies c_1 ON ((sac.company_id = c_1.id))) + LEFT JOIN public.domains ad_1 ON ((c_1.domain_id = ad_1.id))) + LEFT JOIN public.store_apps sa_1 ON ((sasd.store_app = sa_1.id))) + LEFT JOIN public.category_mapping cm ON (((sa_1.category)::text = (cm.original_category)::text))) + ), distinct_ad_and_pub_domains AS ( + SELECT DISTINCT pd.domain_name AS publisher_domain_url, + ad_1.domain_name AS ad_domain_url, + aae.relationship + FROM ((((public.app_ads_entrys aae + LEFT JOIN public.domains ad_1 ON ((aae.ad_domain = ad_1.id))) + LEFT JOIN public.app_ads_map aam ON ((aae.id = aam.app_ads_entry))) + LEFT JOIN public.domains pd ON ((aam.pub_domain = pd.id))) + LEFT JOIN public.adstxt_crawl_results pdcr ON ((pd.id = pdcr.domain_id))) + WHERE ((pdcr.crawled_at - aam.updated_at) < '01:00:00'::interval) + ), combined_sources AS ( + SELECT api_based_companies.store_app, + api_based_companies.app_category, + api_based_companies.company_id, + api_based_companies.parent_id, + api_based_companies.ad_domain, + api_based_companies.tag_source + FROM api_based_companies + UNION ALL + SELECT sdk_based_companies.store_app, + sdk_based_companies.app_category, + sdk_based_companies.company_id, + sdk_based_companies.parent_id, + sdk_based_companies.ad_domain, + sdk_based_companies.tag_source + FROM sdk_based_companies + ) + SELECT cs.ad_domain, + cs.store_app, + sa.category AS app_category, + c.id AS company_id, + COALESCE(c.parent_company_id, c.id) AS parent_id, + CASE + WHEN (sa.sdk_successful_last_crawled IS NOT NULL) THEN bool_or((cs.tag_source = 'sdk'::text)) + ELSE NULL::boolean + END AS sdk, + CASE + WHEN (sa.api_successful_last_crawled IS NOT NULL) THEN bool_or((cs.tag_source = 'api_call'::text)) + ELSE NULL::boolean + END AS api_call, + bool_or((cs.tag_source = 'app_ads_direct'::text)) AS app_ads_direct, + bool_or((cs.tag_source = 'app_ads_reseller'::text)) AS app_ads_reseller + FROM (((combined_sources cs + LEFT JOIN frontend.store_apps_overview sa ON ((cs.store_app = sa.id))) + LEFT JOIN public.domains ad ON (((cs.ad_domain)::text = (ad.domain_name)::text))) + LEFT JOIN adtech.companies c ON ((ad.id = c.domain_id))) + GROUP BY cs.ad_domain, cs.store_app, sa.category, c.id, c.parent_company_id, sa.sdk_successful_last_crawled, sa.api_successful_last_crawled + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.combined_store_apps_companies_2025_h2 OWNER TO postgres; + -- -- Name: combined_store_apps_parent_companies; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres -- @@ -1419,6 +1696,171 @@ CREATE MATERIALIZED VIEW adtech.company_categories AS ALTER MATERIALIZED VIEW adtech.company_categories OWNER TO postgres; +-- +-- Name: company_mediation_adapters; Type: TABLE; Schema: adtech; Owner: postgres +-- + +CREATE TABLE adtech.company_mediation_adapters ( + company_id integer NOT NULL, + adapter_pattern character varying(100) NOT NULL +); + + +ALTER TABLE adtech.company_mediation_adapters OWNER TO postgres; + +-- +-- Name: store_app_ranks_weekly; Type: TABLE; Schema: frontend; Owner: postgres +-- + +CREATE TABLE frontend.store_app_ranks_weekly ( + rank smallint NOT NULL, + best_rank smallint NOT NULL, + country smallint NOT NULL, + store_collection smallint NOT NULL, + store_category smallint NOT NULL, + crawled_date date NOT NULL, + store_app integer NOT NULL +); + + +ALTER TABLE frontend.store_app_ranks_weekly OWNER TO postgres; + +-- +-- Name: company_share_change_2025; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.company_share_change_2025 AS + WITH limit_apps AS ( + SELECT DISTINCT store_app_ranks_weekly.store_app + FROM frontend.store_app_ranks_weekly + WHERE ((store_app_ranks_weekly.crawled_date >= '2025-01-01'::date) AND (store_app_ranks_weekly.crawled_date < '2026-01-01'::date)) + ), totals AS ( + SELECT 'h1'::text AS half, + count(DISTINCT combined_store_apps_companies_2025_h1.store_app) AS total_apps + FROM adtech.combined_store_apps_companies_2025_h1 + WHERE (combined_store_apps_companies_2025_h1.sdk AND (combined_store_apps_companies_2025_h1.store_app IN ( SELECT limit_apps.store_app + FROM limit_apps))) + UNION ALL + SELECT 'h2'::text AS half, + count(DISTINCT combined_store_apps_companies_2025_h2.store_app) AS total_apps + FROM adtech.combined_store_apps_companies_2025_h2 + WHERE (combined_store_apps_companies_2025_h2.sdk AND (combined_store_apps_companies_2025_h2.store_app IN ( SELECT limit_apps.store_app + FROM limit_apps))) + ), domain_counts AS ( + SELECT 'h1'::text AS half, + combined_store_apps_companies_2025_h1.ad_domain, + count(DISTINCT combined_store_apps_companies_2025_h1.store_app) AS app_count + FROM adtech.combined_store_apps_companies_2025_h1 + WHERE (combined_store_apps_companies_2025_h1.sdk AND (combined_store_apps_companies_2025_h1.store_app IN ( SELECT limit_apps.store_app + FROM limit_apps))) + GROUP BY combined_store_apps_companies_2025_h1.ad_domain + UNION ALL + SELECT 'h2'::text AS half, + combined_store_apps_companies_2025_h2.ad_domain, + count(DISTINCT combined_store_apps_companies_2025_h2.store_app) AS app_count + FROM adtech.combined_store_apps_companies_2025_h2 + WHERE (combined_store_apps_companies_2025_h2.sdk AND (combined_store_apps_companies_2025_h2.store_app IN ( SELECT limit_apps.store_app + FROM limit_apps))) + GROUP BY combined_store_apps_companies_2025_h2.ad_domain + ), shares AS ( + SELECT d.half, + d.ad_domain, + d.app_count, + t.total_apps, + ((d.app_count)::numeric / (NULLIF(t.total_apps, 0))::numeric) AS pct_share + FROM (domain_counts d + JOIN totals t ON ((t.half = d.half))) + ), shares_h1 AS ( + SELECT shares.half, + shares.ad_domain, + shares.app_count, + shares.total_apps, + shares.pct_share + FROM shares + WHERE (shares.half = 'h1'::text) + ), shares_h2 AS ( + SELECT shares.half, + shares.ad_domain, + shares.app_count, + shares.total_apps, + shares.pct_share + FROM shares + WHERE (shares.half = 'h2'::text) + ) + SELECT COALESCE(s2.ad_domain, s1.ad_domain) AS ad_domain, + s1.app_count AS apps_h1, + s1.total_apps AS total_apps_h1, + round((COALESCE(s1.pct_share, (0)::numeric) * (100)::numeric), 4) AS share_h1_pct, + s2.app_count AS apps_h2, + s2.total_apps AS total_apps_h2, + round((COALESCE(s2.pct_share, (0)::numeric) * (100)::numeric), 4) AS share_h2_pct, + (COALESCE(s2.app_count, (0)::bigint) - COALESCE(s1.app_count, (0)::bigint)) AS net_app_change, + CASE + WHEN ((s1.app_count IS NULL) OR (s1.app_count = 0)) THEN 100.00 + ELSE round(((((COALESCE(s2.app_count, (0)::bigint) - s1.app_count))::numeric / (s1.app_count)::numeric) * (100)::numeric), 2) + END AS app_growth_pct, + round(((COALESCE(s2.pct_share, (0)::numeric) - COALESCE(s1.pct_share, (0)::numeric)) * (100)::numeric), 6) AS share_change_pp + FROM (shares_h1 s1 + FULL JOIN shares_h2 s2 ON (((s1.ad_domain)::text = (s2.ad_domain)::text))) + ORDER BY (round(((COALESCE(s2.pct_share, (0)::numeric) - COALESCE(s1.pct_share, (0)::numeric)) * (100)::numeric), 6)) DESC NULLS LAST + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.company_share_change_2025 OWNER TO postgres; + +-- +-- Name: company_shares_2025_common; Type: MATERIALIZED VIEW; Schema: adtech; Owner: postgres +-- + +CREATE MATERIALIZED VIEW adtech.company_shares_2025_common AS + WITH common_apps AS ( + SELECT h1.store_app + FROM adtech.store_app_sdk_strings_2025_h1 h1 + INTERSECT + SELECT h2.store_app + FROM adtech.store_app_sdk_strings_2025_h2 h2 + ), h1_stats AS ( + SELECT sd.company_id, + count(DISTINCT store_app_sdk_strings_2025_h1.store_app) AS h1_app_count + FROM (adtech.store_app_sdk_strings_2025_h1 + JOIN adtech.sdks sd ON ((store_app_sdk_strings_2025_h1.sdk_id = sd.id))) + WHERE (store_app_sdk_strings_2025_h1.store_app IN ( SELECT common_apps.store_app + FROM common_apps)) + GROUP BY sd.company_id + ), h2_stats AS ( + SELECT sd.company_id, + count(DISTINCT store_app_sdk_strings_2025_h2.store_app) AS h2_app_count + FROM (adtech.store_app_sdk_strings_2025_h2 + JOIN adtech.sdks sd ON ((store_app_sdk_strings_2025_h2.sdk_id = sd.id))) + WHERE (store_app_sdk_strings_2025_h2.store_app IN ( SELECT common_apps.store_app + FROM common_apps)) + GROUP BY sd.company_id + ), comb AS ( + SELECT COALESCE(h1.company_id, h2.company_id) AS sdk_company_id, + ( SELECT count(*) AS count + FROM common_apps) AS total_app_count, + h1.h1_app_count, + h2.h2_app_count, + (h2.h2_app_count - h1.h1_app_count) AS net_migration, + round((((h2.h2_app_count)::numeric / (h1.h1_app_count)::numeric) - (1)::numeric), 4) AS round + FROM (h1_stats h1 + FULL JOIN h2_stats h2 ON ((h1.company_id = h2.company_id))) + ) + SELECT co.sdk_company_id, + co.total_app_count, + co.h1_app_count, + co.h2_app_count, + co.net_migration, + co.round, + d.domain_name AS company_domain + FROM ((comb co + LEFT JOIN adtech.companies c ON ((co.sdk_company_id = c.id))) + LEFT JOIN public.domains d ON ((d.id = c.domain_id))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW adtech.company_shares_2025_common OWNER TO postgres; + -- -- Name: sdk_packages_id_seq; Type: SEQUENCE; Schema: adtech; Owner: postgres -- @@ -1509,12 +1951,12 @@ ALTER SEQUENCE adtech.url_redirect_chains_id_seq OWNED BY adtech.url_redirect_ch CREATE TABLE adtech.urls ( id integer NOT NULL, url text NOT NULL, - url_hash character(32) GENERATED ALWAYS AS (md5(url)) STORED, domain_id integer, scheme text NOT NULL, is_deep_link boolean GENERATED ALWAYS AS ((scheme <> ALL (ARRAY['http'::text, 'https'::text, 'ftp'::text]))) STORED, created_at timestamp with time zone DEFAULT now(), - hostname text + hostname text, + url_hash character(32) ); @@ -1961,15 +2403,61 @@ ALTER MATERIALIZED VIEW frontend.api_call_countries OWNER TO postgres; CREATE TABLE frontend.app_keyword_ranks_daily ( crawled_date date NOT NULL, + store smallint NOT NULL, country smallint NOT NULL, - app_rank smallint NOT NULL, keyword_id integer NOT NULL, - store_app integer NOT NULL + store_app integer NOT NULL, + app_rank smallint NOT NULL ); ALTER TABLE frontend.app_keyword_ranks_daily OWNER TO postgres; +-- +-- Name: app_keyword_rank_stats; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- + +CREATE MATERIALIZED VIEW frontend.app_keyword_rank_stats AS + WITH latest_per_country AS ( + SELECT app_keyword_ranks_daily.country, + max(app_keyword_ranks_daily.crawled_date) AS max_crawled_date + FROM frontend.app_keyword_ranks_daily + GROUP BY app_keyword_ranks_daily.country + ), d30_keywords AS ( + SELECT akr.country, + akr.store_app, + akr.keyword_id, + min(akr.app_rank) AS d30_best_rank + FROM frontend.app_keyword_ranks_daily akr + WHERE (akr.crawled_date >= (CURRENT_DATE - '30 days'::interval)) + GROUP BY akr.country, akr.store_app, akr.keyword_id + ), latest_ranks AS ( + SELECT kr.country, + kr.store_app, + kr.keyword_id, + kr.app_rank AS latest_app_rank + FROM (frontend.app_keyword_ranks_daily kr + JOIN latest_per_country lpc ON (((kr.country = lpc.country) AND (kr.crawled_date = lpc.max_crawled_date)))) + ), all_ranked_keywords AS ( + SELECT rk.country, + rk.store_app, + rk.keyword_id, + rk.d30_best_rank, + lk.latest_app_rank + FROM (d30_keywords rk + LEFT JOIN latest_ranks lk ON (((lk.country = rk.country) AND (lk.store_app = rk.store_app) AND (lk.keyword_id = rk.keyword_id)))) + ) + SELECT country, + store_app, + keyword_id, + d30_best_rank, + latest_app_rank + FROM all_ranked_keywords + WITH NO DATA; + + +ALTER MATERIALIZED VIEW frontend.app_keyword_rank_stats OWNER TO postgres; + -- -- Name: apps_new_monthly; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres -- @@ -1981,6 +2469,7 @@ CREATE MATERIALIZED VIEW frontend.apps_new_monthly AS sa_1.store_id, sa_1.store, sa_1.category, + sa_1.developer_name, sa_1.rating, sa_1.installs, sa_1.installs_sum_1w, @@ -1993,7 +2482,7 @@ CREATE MATERIALIZED VIEW frontend.apps_new_monthly AS sa_1.created_at, sa_1.updated_at, sa_1.crawl_result, - sa_1.icon_url_512, + sa_1.icon_url_100, sa_1.release_date, sa_1.rating_count, sa_1.featured_image_url, @@ -2007,39 +2496,38 @@ CREATE MATERIALIZED VIEW frontend.apps_new_monthly AS FROM frontend.store_apps_overview sa_1 WHERE ((sa_1.release_date >= (CURRENT_DATE - '30 days'::interval)) AND (sa_1.created_at >= (CURRENT_DATE - '45 days'::interval)) AND (sa_1.crawl_result = 1)) ) - SELECT ra.id, - ra.name, - ra.store_id, - ra.store, - ra.category, - ra.rating, - ra.installs, - ra.installs_sum_1w, - ra.installs_sum_4w, - ra.ratings_sum_1w, - ra.ratings_sum_4w, - ra.store_last_updated, - ra.ad_supported, - ra.in_app_purchases, - ra.created_at, - ra.updated_at, - ra.crawl_result, - sa.icon_url_100, - ra.icon_url_512, - ra.release_date, - ra.rating_count, - ra.featured_image_url, - ra.phone_image_url_1, - ra.phone_image_url_2, - ra.phone_image_url_3, - ra.tablet_image_url_1, - ra.tablet_image_url_2, - ra.tablet_image_url_3, - ra.category AS app_category, - ra.rn - FROM (rankedapps ra - LEFT JOIN public.store_apps sa ON ((ra.id = sa.id))) - WHERE (ra.rn <= 100) + SELECT id, + name, + store_id, + store, + category, + developer_name, + rating, + installs, + installs_sum_1w, + installs_sum_4w, + ratings_sum_1w, + ratings_sum_4w, + store_last_updated, + ad_supported, + in_app_purchases, + created_at, + updated_at, + crawl_result, + icon_url_100, + release_date, + rating_count, + featured_image_url, + phone_image_url_1, + phone_image_url_2, + phone_image_url_3, + tablet_image_url_1, + tablet_image_url_2, + tablet_image_url_3, + category AS app_category, + rn + FROM rankedapps ra + WHERE (rn <= 100) WITH NO DATA; @@ -2056,6 +2544,7 @@ CREATE MATERIALIZED VIEW frontend.apps_new_weekly AS sa_1.store_id, sa_1.store, sa_1.category, + sa_1.developer_name, sa_1.rating, sa_1.installs, sa_1.installs_sum_1w, @@ -2068,7 +2557,7 @@ CREATE MATERIALIZED VIEW frontend.apps_new_weekly AS sa_1.created_at, sa_1.updated_at, sa_1.crawl_result, - sa_1.icon_url_512, + sa_1.icon_url_100, sa_1.release_date, sa_1.rating_count, sa_1.featured_image_url, @@ -2082,39 +2571,38 @@ CREATE MATERIALIZED VIEW frontend.apps_new_weekly AS FROM frontend.store_apps_overview sa_1 WHERE ((sa_1.release_date >= (CURRENT_DATE - '7 days'::interval)) AND (sa_1.created_at >= (CURRENT_DATE - '11 days'::interval)) AND (sa_1.crawl_result = 1)) ) - SELECT ra.id, - ra.name, - ra.store_id, - ra.store, - ra.category, - ra.rating, - ra.installs, - ra.installs_sum_1w, - ra.installs_sum_4w, - ra.ratings_sum_1w, - ra.ratings_sum_4w, - ra.store_last_updated, - ra.ad_supported, - ra.in_app_purchases, - ra.created_at, - ra.updated_at, - ra.crawl_result, - sa.icon_url_100, - ra.icon_url_512, - ra.release_date, - ra.rating_count, - ra.featured_image_url, - ra.phone_image_url_1, - ra.phone_image_url_2, - ra.phone_image_url_3, - ra.tablet_image_url_1, - ra.tablet_image_url_2, - ra.tablet_image_url_3, - ra.category AS app_category, - ra.rn - FROM (rankedapps ra - LEFT JOIN public.store_apps sa ON ((ra.id = sa.id))) - WHERE (ra.rn <= 100) + SELECT id, + name, + store_id, + store, + category, + developer_name, + rating, + installs, + installs_sum_1w, + installs_sum_4w, + ratings_sum_1w, + ratings_sum_4w, + store_last_updated, + ad_supported, + in_app_purchases, + created_at, + updated_at, + crawl_result, + icon_url_100, + release_date, + rating_count, + featured_image_url, + phone_image_url_1, + phone_image_url_2, + phone_image_url_3, + tablet_image_url_1, + tablet_image_url_2, + tablet_image_url_3, + category AS app_category, + rn + FROM rankedapps ra + WHERE (rn <= 100) WITH NO DATA; @@ -2131,6 +2619,8 @@ CREATE MATERIALIZED VIEW frontend.apps_new_yearly AS sa_1.store_id, sa_1.store, sa_1.category, + sa_1.developer_name, + sa_1.icon_url_100, sa_1.rating, sa_1.installs, sa_1.installs_sum_1w, @@ -2157,39 +2647,38 @@ CREATE MATERIALIZED VIEW frontend.apps_new_yearly AS FROM frontend.store_apps_overview sa_1 WHERE ((sa_1.release_date >= (CURRENT_DATE - '365 days'::interval)) AND (sa_1.created_at >= (CURRENT_DATE - '380 days'::interval)) AND (sa_1.crawl_result = 1)) ) - SELECT ra.id, - ra.name, - ra.store_id, - ra.store, - ra.category, - ra.rating, - ra.installs, - ra.installs_sum_1w, - ra.installs_sum_4w, - ra.ratings_sum_1w, - ra.ratings_sum_4w, - ra.store_last_updated, - ra.ad_supported, - ra.in_app_purchases, - ra.created_at, - ra.updated_at, - ra.crawl_result, - sa.icon_url_100, - ra.icon_url_512, - ra.release_date, - ra.rating_count, - ra.featured_image_url, - ra.phone_image_url_1, - ra.phone_image_url_2, - ra.phone_image_url_3, - ra.tablet_image_url_1, - ra.tablet_image_url_2, - ra.tablet_image_url_3, - ra.category AS app_category, - ra.rn - FROM (rankedapps ra - LEFT JOIN public.store_apps sa ON ((ra.id = sa.id))) - WHERE (ra.rn <= 100) + SELECT id, + name, + store_id, + store, + category, + developer_name, + rating, + installs, + installs_sum_1w, + installs_sum_4w, + ratings_sum_1w, + ratings_sum_4w, + store_last_updated, + ad_supported, + in_app_purchases, + created_at, + updated_at, + crawl_result, + icon_url_100, + release_date, + rating_count, + featured_image_url, + phone_image_url_1, + phone_image_url_2, + phone_image_url_3, + tablet_image_url_1, + tablet_image_url_2, + tablet_image_url_3, + category AS app_category, + rn + FROM rankedapps ra + WHERE (rn <= 100) WITH NO DATA; @@ -2200,62 +2689,133 @@ ALTER MATERIALIZED VIEW frontend.apps_new_yearly OWNER TO postgres; -- CREATE MATERIALIZED VIEW frontend.category_tag_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, - tag.tag_source, - sa.installs, - sa.rating_count - FROM ((adtech.combined_store_apps_companies csac - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) + WITH distinct_apps_group AS ( + SELECT DISTINCT csac.store_app, + tag.tag_source + FROM (adtech.combined_store_apps_companies csac CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) WHERE (tag.present IS TRUE) ) - SELECT dag.store, - dag.app_category, + SELECT sa.store, + sa.category AS app_category, dag.tag_source, count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.tag_source + LEFT JOIN frontend.store_apps_overview sa ON ((dag.store_app = sa.id))) + GROUP BY sa.store, sa.category, dag.tag_source WITH NO DATA; ALTER MATERIALIZED VIEW frontend.category_tag_stats OWNER TO postgres; +-- +-- Name: category_tag_type_stats; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- + +CREATE MATERIALIZED VIEW frontend.category_tag_type_stats AS + WITH minimized_company_categories AS ( + SELECT company_categories.company_id, + min(company_categories.category_id) AS category_id + FROM adtech.company_categories + GROUP BY company_categories.company_id + ), api_and_app_ads AS ( + SELECT x.store, + x.app_category, + x.tag_source, + x.type_url_slug, + count(*) AS app_count, + sum(x.installs_sum_4w_est) AS installs_d30, + sum(x.installs_est) AS installs_total + FROM ( SELECT DISTINCT csac.store_app, + sa.store, + csac.app_category, + tag.tag_source, + CASE + WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying + ELSE cats.url_slug + END AS type_url_slug, + sa.installs_sum_4w_est, + sa.installs_est + FROM ((((adtech.combined_store_apps_companies csac + LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) + JOIN minimized_company_categories mcc ON ((csac.company_id = mcc.company_id))) + LEFT JOIN adtech.categories cats ON ((mcc.category_id = cats.id))) + CROSS JOIN LATERAL ( VALUES ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) + WHERE ((tag.present IS TRUE) AND (sa.id IS NOT NULL))) x + GROUP BY x.store, x.app_category, x.tag_source, x.type_url_slug + ), store_app_sdks AS ( + SELECT DISTINCT sass.store_app, + sass.sdk_id + FROM adtech.store_app_sdk_strings sass + WHERE (sass.sdk_id IS NOT NULL) + ), sdk_and_mediation AS ( + SELECT x.store, + x.app_category, + 'sdk'::text AS tag_source, + x.type_url_slug, + count(*) AS app_count, + sum(x.installs_sum_4w_est) AS installs_d30, + sum(x.installs_est) AS installs_total + FROM ( SELECT DISTINCT sas.store_app, + sa.store, + sa.category AS app_category, + cats.url_slug AS type_url_slug, + sa.installs_sum_4w_est, + sa.installs_est + FROM (((store_app_sdks sas + LEFT JOIN frontend.store_apps_overview sa ON ((sas.store_app = sa.id))) + LEFT JOIN adtech.sdk_categories sc ON ((sas.sdk_id = sc.sdk_id))) + LEFT JOIN adtech.categories cats ON ((sc.category_id = cats.id))) + WHERE (sa.id IS NOT NULL)) x + GROUP BY x.store, x.app_category, x.type_url_slug + ) + SELECT api_and_app_ads.store, + api_and_app_ads.app_category, + api_and_app_ads.tag_source, + api_and_app_ads.type_url_slug, + api_and_app_ads.app_count, + api_and_app_ads.installs_d30, + api_and_app_ads.installs_total + FROM api_and_app_ads +UNION ALL + SELECT sdk_and_mediation.store, + sdk_and_mediation.app_category, + sdk_and_mediation.tag_source, + sdk_and_mediation.type_url_slug, + sdk_and_mediation.app_count, + sdk_and_mediation.installs_d30, + sdk_and_mediation.installs_total + FROM sdk_and_mediation + WITH NO DATA; + + +ALTER MATERIALIZED VIEW frontend.category_tag_type_stats OWNER TO postgres; + -- -- Name: companies_apps_overview; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres -- CREATE MATERIALIZED VIEW frontend.companies_apps_overview AS - WITH store_app_sdk_companies AS ( + WITH store_app_sdk_company_category AS ( SELECT DISTINCT savs.store_app, - savs.company_id - FROM adtech.store_app_sdk_strings savs + sd.company_id, + sc.category_id + FROM ((adtech.store_app_sdk_strings savs + LEFT JOIN adtech.sdks sd ON ((savs.sdk_id = sd.id))) + JOIN adtech.sdk_categories sc ON ((savs.sdk_id = sc.sdk_id))) ) SELECT sa.store_id, sacs.company_id, c.name AS company_name, d.domain_name AS company_domain, cc2.url_slug AS category_slug - FROM (((((store_app_sdk_companies sacs + FROM ((((store_app_sdk_company_category sacs LEFT JOIN public.store_apps sa ON ((sacs.store_app = sa.id))) LEFT JOIN adtech.companies c ON ((sacs.company_id = c.id))) LEFT JOIN public.domains d ON ((c.domain_id = d.id))) - LEFT JOIN adtech.company_categories cc ON ((c.id = cc.company_id))) - LEFT JOIN adtech.categories cc2 ON ((cc.category_id = cc2.id))) + LEFT JOIN adtech.categories cc2 ON ((sacs.category_id = cc2.id))) WHERE (sacs.company_id IS NOT NULL) WITH NO DATA; @@ -2267,237 +2827,149 @@ ALTER MATERIALIZED VIEW frontend.companies_apps_overview OWNER TO postgres; -- CREATE MATERIALIZED VIEW frontend.companies_category_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, - csac.ad_domain AS company_domain, - c.name AS company_name, - sa.installs, - sa.rating_count - FROM ((adtech.combined_store_apps_companies csac - LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) - ) - SELECT dag.store, - dag.app_category, - dag.company_domain, - dag.company_name, - count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total - FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.company_domain, dag.company_name - WITH NO DATA; - - -ALTER MATERIALIZED VIEW frontend.companies_category_stats OWNER TO postgres; - --- --- Name: companies_category_tag_stats; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres --- - -CREATE MATERIALIZED VIEW frontend.companies_category_tag_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, - tag.tag_source, - csac.ad_domain AS company_domain, - c.name AS company_name, - sa.installs, - sa.rating_count - FROM (((adtech.combined_store_apps_companies csac - LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) - CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) - WHERE (tag.present IS TRUE) - ) - SELECT dag.store, - dag.app_category, - dag.tag_source, - dag.company_domain, - dag.company_name, - count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total - FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.tag_source, dag.company_domain, dag.company_name - WITH NO DATA; - - -ALTER MATERIALIZED VIEW frontend.companies_category_tag_stats OWNER TO postgres; - --- --- Name: companies_category_tag_type_stats; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres --- - -CREATE MATERIALIZED VIEW frontend.companies_category_tag_type_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ) SELECT sa.store, - csac.app_category, - tag.tag_source, + sa.category AS app_category, csac.ad_domain AS company_domain, c.name AS company_name, - CASE - WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying - ELSE cats.url_slug - END AS type_url_slug, count(DISTINCT csac.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(sa.installs) AS installs_total, - sum(sa.rating_count) AS rating_count_total - FROM ((((((adtech.combined_store_apps_companies csac + sum(sa.installs_est) AS installs_total, + sum(sa.installs_sum_4w_est) AS installs_d30 + FROM ((adtech.combined_store_apps_companies csac LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) - LEFT JOIN d30_counts dc ON ((csac.store_app = dc.store_app))) - LEFT JOIN adtech.company_categories ccats ON ((csac.company_id = ccats.company_id))) - LEFT JOIN adtech.categories cats ON ((ccats.category_id = cats.id))) - CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) - WHERE (tag.present IS TRUE) - GROUP BY sa.store, csac.app_category, tag.tag_source, csac.ad_domain, c.name, - CASE - WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying - ELSE cats.url_slug - END + GROUP BY sa.store, sa.category, csac.ad_domain, c.name WITH NO DATA; -ALTER MATERIALIZED VIEW frontend.companies_category_tag_type_stats OWNER TO postgres; - --- --- Name: companies_creative_rankings; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres --- - -CREATE MATERIALIZED VIEW frontend.companies_creative_rankings AS - WITH creative_rankings AS ( - SELECT ca.file_extension, - ac_1.id AS api_call_id, - cr.advertiser_store_app_id, - cr.creative_initial_domain_id, - cr.creative_host_domain_id, - cr.additional_ad_domain_ids, - vcasr.run_at, - ca.md5_hash, - COALESCE(ca.phash, ca.md5_hash) AS vhash - FROM (((public.creative_records cr - LEFT JOIN public.creative_assets ca ON ((cr.creative_asset_id = ca.id))) - LEFT JOIN public.api_calls ac_1 ON ((cr.api_call_id = ac_1.id))) - LEFT JOIN public.version_code_api_scan_results vcasr ON ((ac_1.run_id = vcasr.id))) - ), combined_domains AS ( - SELECT cr.api_call_id, - cr.vhash, - cr.md5_hash, - cr.file_extension, - cr.creative_initial_domain_id AS domain_id, - cr.advertiser_store_app_id, - cr.run_at - FROM creative_rankings cr - UNION - SELECT cr.api_call_id, - cr.vhash, - cr.md5_hash, - cr.file_extension, - cr.creative_host_domain_id, - cr.advertiser_store_app_id, - cr.run_at - FROM creative_rankings cr - UNION - SELECT cr.api_call_id, - cr.vhash, - cr.md5_hash, - cr.file_extension, - unnest(cr.additional_ad_domain_ids) AS unnest, - cr.advertiser_store_app_id, - cr.run_at - FROM creative_rankings cr - ), visually_distinct AS ( - SELECT cdm.company_id, - cd.file_extension, - cd.advertiser_store_app_id, - cd.vhash, - min((cd.md5_hash)::text) AS md5_hash, - max(cd.api_call_id) AS last_api_call_id, - max(cd.run_at) AS last_seen - FROM (combined_domains cd - LEFT JOIN adtech.company_domain_mapping cdm ON ((cd.domain_id = cdm.domain_id))) - GROUP BY cdm.company_id, cd.file_extension, cd.advertiser_store_app_id, cd.vhash +ALTER MATERIALIZED VIEW frontend.companies_category_stats OWNER TO postgres; + +-- +-- Name: companies_category_tag_stats; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- + +CREATE MATERIALIZED VIEW frontend.companies_category_tag_stats AS + WITH distinct_apps_group AS ( + SELECT csac.store_app, + csac.app_category, + tag.tag_source, + csac.ad_domain AS company_domain, + c.name AS company_name + FROM ((adtech.combined_store_apps_companies csac + LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) + CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) + WHERE (tag.present IS TRUE) ) - SELECT vd.company_id, - vd.md5_hash, - vd.file_extension, - ad.domain_name AS company_domain, - saa.name AS advertiser_name, - saa.store, - saa.store_id AS advertiser_store_id, - sap.store_id AS publisher_store_id, - sap.name AS publisher_name, - saa.installs, - saa.rating_count, - saa.rating, - saa.installs_sum_1w, - saa.ratings_sum_1w, - saa.installs_sum_4w, - saa.ratings_sum_4w, - vd.last_seen, - CASE - WHEN (saa.icon_url_100 IS NOT NULL) THEN (concat('https://media.appgoblin.info/app-icons/', saa.store_id, '/', saa.icon_url_100))::character varying - ELSE saa.icon_url_512 - END AS advertiser_icon_url, - CASE - WHEN (sap.icon_url_100 IS NOT NULL) THEN (concat('https://media.appgoblin.info/app-icons/', sap.store_id, '/', sap.icon_url_100))::character varying - ELSE sap.icon_url_512 - END AS publisher_icon_url - FROM (((((visually_distinct vd - LEFT JOIN public.api_calls ac ON ((vd.last_api_call_id = ac.id))) - LEFT JOIN adtech.companies c ON ((vd.company_id = c.id))) - LEFT JOIN public.domains ad ON ((c.domain_id = ad.id))) - LEFT JOIN frontend.store_apps_overview saa ON ((vd.advertiser_store_app_id = saa.id))) - LEFT JOIN frontend.store_apps_overview sap ON ((ac.store_app = sap.id))) - WHERE (c.id IS NOT NULL) - ORDER BY vd.last_seen DESC + SELECT sa.store, + sa.category AS app_category, + dag.tag_source, + dag.company_domain, + dag.company_name, + count(DISTINCT dag.store_app) AS app_count, + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total + FROM (distinct_apps_group dag + LEFT JOIN frontend.store_apps_overview sa ON ((dag.store_app = sa.id))) + GROUP BY sa.store, sa.category, dag.tag_source, dag.company_domain, dag.company_name WITH NO DATA; -ALTER MATERIALIZED VIEW frontend.companies_creative_rankings OWNER TO postgres; +ALTER MATERIALIZED VIEW frontend.companies_category_tag_stats OWNER TO postgres; + +-- +-- Name: companies_category_tag_type_stats; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- + +CREATE MATERIALIZED VIEW frontend.companies_category_tag_type_stats AS + WITH minimized_company_categories AS ( + SELECT company_categories.company_id, + min(company_categories.category_id) AS category_id + FROM adtech.company_categories + GROUP BY company_categories.company_id + ), api_and_app_ads AS ( + SELECT sa.store, + csac.app_category, + tag.tag_source, + csac.ad_domain AS company_domain, + c.name AS company_name, + CASE + WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying + ELSE cats.url_slug + END AS type_url_slug, + count(DISTINCT csac.store_app) AS app_count, + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total + FROM (((((adtech.combined_store_apps_companies csac + LEFT JOIN adtech.companies c ON ((csac.company_id = c.id))) + LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) + LEFT JOIN minimized_company_categories mcc ON ((csac.company_id = mcc.company_id))) + LEFT JOIN adtech.categories cats ON ((mcc.category_id = cats.id))) + CROSS JOIN LATERAL ( VALUES ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) + WHERE (tag.present IS TRUE) + GROUP BY sa.store, csac.app_category, tag.tag_source, csac.ad_domain, c.name, + CASE + WHEN (tag.tag_source ~~ 'app_ads%'::text) THEN 'ad-networks'::character varying + ELSE cats.url_slug + END + ), store_app_sdks AS ( + SELECT DISTINCT sass.store_app, + sass.sdk_id + FROM adtech.store_app_sdk_strings sass + WHERE (sass.sdk_id IS NOT NULL) + ), sdk_and_mediation AS ( + SELECT sa.store, + sa.category AS app_category, + 'sdk'::text AS tag_source, + d.domain_name AS company_domain, + c.name AS company_name, + cats.url_slug AS type_url_slug, + count(DISTINCT sas.store_app) AS app_count, + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total + FROM ((((((store_app_sdks sas + LEFT JOIN adtech.sdks s ON ((sas.sdk_id = s.id))) + LEFT JOIN adtech.companies c ON ((s.company_id = c.id))) + LEFT JOIN public.domains d ON ((c.domain_id = d.id))) + LEFT JOIN frontend.store_apps_overview sa ON ((sas.store_app = sa.id))) + LEFT JOIN adtech.sdk_categories sc ON ((sas.sdk_id = sc.sdk_id))) + LEFT JOIN adtech.categories cats ON ((sc.category_id = cats.id))) + GROUP BY sa.store, sa.category, 'sdk'::text, d.domain_name, c.name, cats.url_slug + ) + SELECT api_and_app_ads.store, + api_and_app_ads.app_category, + api_and_app_ads.tag_source, + api_and_app_ads.company_domain, + api_and_app_ads.company_name, + api_and_app_ads.type_url_slug, + api_and_app_ads.app_count, + api_and_app_ads.installs_d30, + api_and_app_ads.installs_total + FROM api_and_app_ads +UNION ALL + SELECT sdk_and_mediation.store, + sdk_and_mediation.app_category, + sdk_and_mediation.tag_source, + sdk_and_mediation.company_domain, + sdk_and_mediation.company_name, + sdk_and_mediation.type_url_slug, + sdk_and_mediation.app_count, + sdk_and_mediation.installs_d30, + sdk_and_mediation.installs_total + FROM sdk_and_mediation + WITH NO DATA; + + +ALTER MATERIALIZED VIEW frontend.companies_category_tag_type_stats OWNER TO postgres; -- --- Name: companies_creative_rankings_new; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- Name: companies_creative_rankings; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres -- -CREATE MATERIALIZED VIEW frontend.companies_creative_rankings_new AS +CREATE MATERIALIZED VIEW frontend.companies_creative_rankings AS WITH creative_rankings AS ( SELECT ca.file_extension, ac_1.id AS api_call_id, cr.advertiser_store_app_id, + cr.advertiser_domain_id, cr.creative_initial_domain_id, cr.creative_host_domain_id, cr.additional_ad_domain_ids, @@ -2515,6 +2987,7 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings_new AS cr.file_extension, cr.creative_initial_domain_id AS domain_id, cr.advertiser_store_app_id, + cr.advertiser_domain_id, cr.run_at FROM creative_rankings cr UNION @@ -2524,6 +2997,7 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings_new AS cr.file_extension, cr.creative_host_domain_id, cr.advertiser_store_app_id, + cr.advertiser_domain_id, cr.run_at FROM creative_rankings cr UNION @@ -2533,19 +3007,21 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings_new AS cr.file_extension, unnest(cr.additional_ad_domain_ids) AS unnest, cr.advertiser_store_app_id, + cr.advertiser_domain_id, cr.run_at FROM creative_rankings cr ), visually_distinct AS ( SELECT cdm.company_id, cd.file_extension, cd.advertiser_store_app_id, + cd.advertiser_domain_id, cd.vhash, min((cd.md5_hash)::text) AS md5_hash, max(cd.api_call_id) AS last_api_call_id, max(cd.run_at) AS last_seen FROM (combined_domains cd LEFT JOIN adtech.company_domain_mapping cdm ON ((cd.domain_id = cdm.domain_id))) - GROUP BY cdm.company_id, cd.file_extension, cd.advertiser_store_app_id, cd.vhash + GROUP BY cdm.company_id, cd.file_extension, cd.advertiser_store_app_id, cd.advertiser_domain_id, cd.vhash ) SELECT vd.company_id, vd.md5_hash, @@ -2554,6 +3030,7 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings_new AS saa.name AS advertiser_name, saa.store, saa.store_id AS advertiser_store_id, + adv.domain_name AS advertiser_domain_name, sap.store_id AS publisher_store_id, sap.name AS publisher_name, saa.installs, @@ -2572,10 +3049,11 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings_new AS WHEN (sap.icon_url_100 IS NOT NULL) THEN (concat('https://media.appgoblin.info/app-icons/', sap.store_id, '/', sap.icon_url_100))::character varying ELSE sap.icon_url_512 END AS publisher_icon_url - FROM (((((visually_distinct vd + FROM ((((((visually_distinct vd LEFT JOIN public.api_calls ac ON ((vd.last_api_call_id = ac.id))) LEFT JOIN adtech.companies c ON ((vd.company_id = c.id))) LEFT JOIN public.domains ad ON ((c.domain_id = ad.id))) + LEFT JOIN public.domains adv ON ((vd.advertiser_domain_id = adv.id))) LEFT JOIN frontend.store_apps_overview saa ON ((vd.advertiser_store_app_id = saa.id))) LEFT JOIN frontend.store_apps_overview sap ON ((ac.store_app = sap.id))) WHERE (c.id IS NOT NULL) @@ -2583,7 +3061,7 @@ CREATE MATERIALIZED VIEW frontend.companies_creative_rankings_new AS WITH NO DATA; -ALTER MATERIALIZED VIEW frontend.companies_creative_rankings_new OWNER TO postgres; +ALTER MATERIALIZED VIEW frontend.companies_creative_rankings OWNER TO postgres; -- -- Name: companies_open_source_percent; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres @@ -2610,42 +3088,28 @@ ALTER MATERIALIZED VIEW frontend.companies_open_source_percent OWNER TO postgres -- CREATE MATERIALIZED VIEW frontend.companies_parent_category_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, - c.name AS company_name, - sa.installs, - sa.rating_count, - COALESCE(ad.domain_name, csac.ad_domain) AS company_domain - FROM (((adtech.combined_store_apps_companies csac + WITH distinct_apps_group AS ( + SELECT DISTINCT csac.store_app, + COALESCE(ad.domain_name, csac.ad_domain) AS company_domain, + c.name AS company_name + FROM ((adtech.combined_store_apps_companies csac LEFT JOIN adtech.companies c ON ((csac.parent_id = c.id))) LEFT JOIN public.domains ad ON ((c.domain_id = ad.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) WHERE (csac.parent_id IN ( SELECT DISTINCT pc.id FROM (adtech.companies pc LEFT JOIN adtech.companies c_1 ON ((pc.id = c_1.parent_company_id))) WHERE (c_1.id IS NOT NULL))) ) - SELECT dag.store, - dag.app_category, + SELECT sa.store, + sa.category AS app_category, dag.company_domain, dag.company_name, count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.company_domain, dag.company_name + LEFT JOIN frontend.store_apps_overview sa ON ((dag.store_app = sa.id))) + GROUP BY sa.store, sa.category, dag.company_domain, dag.company_name WITH NO DATA; @@ -2656,45 +3120,31 @@ ALTER MATERIALIZED VIEW frontend.companies_parent_category_stats OWNER TO postgr -- CREATE MATERIALIZED VIEW frontend.companies_parent_category_tag_stats AS - WITH d30_counts AS ( - SELECT sahw.store_app, - sum(sahw.installs_diff) AS d30_installs, - sum(sahw.rating_count_diff) AS d30_rating_count - FROM public.app_global_metrics_weekly_diffs sahw - WHERE ((sahw.week_start > (CURRENT_DATE - '31 days'::interval)) AND ((sahw.installs_diff > (0)::numeric) OR (sahw.rating_count_diff > (0)::numeric))) - GROUP BY sahw.store_app - ), distinct_apps_group AS ( - SELECT sa.store, - csac.store_app, - csac.app_category, + WITH distinct_apps_group AS ( + SELECT DISTINCT csac.store_app, tag.tag_source, c.name AS company_name, - sa.installs, - sa.rating_count, COALESCE(ad.domain_name, csac.ad_domain) AS company_domain - FROM ((((adtech.combined_store_apps_companies csac + FROM (((adtech.combined_store_apps_companies csac LEFT JOIN adtech.companies c ON ((csac.parent_id = c.id))) LEFT JOIN public.domains ad ON ((c.domain_id = ad.id))) - LEFT JOIN frontend.store_apps_overview sa ON ((csac.store_app = sa.id))) CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) WHERE ((tag.present IS TRUE) AND (csac.parent_id IN ( SELECT DISTINCT pc.id FROM (adtech.companies pc LEFT JOIN adtech.companies c_1 ON ((pc.id = c_1.parent_company_id))) WHERE (c_1.id IS NOT NULL)))) ) - SELECT dag.store, - dag.app_category, + SELECT sa.store, + sa.category AS app_category, dag.tag_source, dag.company_domain, dag.company_name, count(DISTINCT dag.store_app) AS app_count, - sum(dc.d30_installs) AS installs_d30, - sum(dc.d30_rating_count) AS rating_count_d30, - sum(dag.installs) AS installs_total, - sum(dag.rating_count) AS rating_count_total + sum(sa.installs_sum_4w_est) AS installs_d30, + sum(sa.installs_est) AS installs_total FROM (distinct_apps_group dag - LEFT JOIN d30_counts dc ON ((dag.store_app = dc.store_app))) - GROUP BY dag.store, dag.app_category, dag.tag_source, dag.company_domain, dag.company_name + LEFT JOIN frontend.store_apps_overview sa ON ((dag.store_app = sa.id))) + GROUP BY sa.store, sa.category, dag.tag_source, dag.company_domain, dag.company_name WITH NO DATA; @@ -2843,6 +3293,8 @@ CREATE MATERIALIZED VIEW frontend.company_parent_top_apps AS sa.name, sa.store_id, csapc.app_category, + sa.developer_name, + sa.icon_url_100, sa.installs_sum_4w AS installs_d30, sa.ratings_sum_4w AS rating_count_d30, csapc.sdk, @@ -2858,6 +3310,8 @@ CREATE MATERIALIZED VIEW frontend.company_parent_top_apps AS deduped_data.store, deduped_data.name, deduped_data.store_id, + deduped_data.developer_name, + deduped_data.icon_url_100, deduped_data.app_category, deduped_data.installs_d30, deduped_data.rating_count_d30, @@ -2873,7 +3327,9 @@ CREATE MATERIALIZED VIEW frontend.company_parent_top_apps AS store, name, store_id, + developer_name, app_category, + icon_url_100, installs_d30, rating_count_d30, sdk, @@ -2900,6 +3356,8 @@ CREATE MATERIALIZED VIEW frontend.company_top_apps AS sa.name, sa.store_id, cac.app_category, + sa.developer_name, + sa.icon_url_100, sa.installs_sum_4w AS installs_d30, sa.ratings_sum_4w AS rating_count_d30, cac.sdk, @@ -2915,6 +3373,8 @@ CREATE MATERIALIZED VIEW frontend.company_top_apps AS deduped_data.store, deduped_data.name, deduped_data.store_id, + deduped_data.developer_name, + deduped_data.icon_url_100, deduped_data.app_category, deduped_data.installs_d30, deduped_data.rating_count_d30, @@ -2930,6 +3390,8 @@ CREATE MATERIALIZED VIEW frontend.company_top_apps AS store, name, store_id, + developer_name, + icon_url_100, app_category, installs_d30, rating_count_d30, @@ -2946,18 +3408,18 @@ CREATE MATERIALIZED VIEW frontend.company_top_apps AS ALTER MATERIALIZED VIEW frontend.company_top_apps OWNER TO postgres; -- --- Name: description_keywords; Type: TABLE; Schema: public; Owner: postgres +-- Name: app_keywords_extracted; Type: TABLE; Schema: public; Owner: postgres -- -CREATE TABLE public.description_keywords ( - id integer NOT NULL, - description_id integer NOT NULL, +CREATE TABLE public.app_keywords_extracted ( + store_app integer NOT NULL, keyword_id integer NOT NULL, - extracted_at timestamp without time zone DEFAULT now() NOT NULL + description_id integer NOT NULL, + extracted_at timestamp without time zone NOT NULL ); -ALTER TABLE public.description_keywords OWNER TO postgres; +ALTER TABLE public.app_keywords_extracted OWNER TO postgres; -- -- Name: keywords; Type: TABLE; Schema: public; Owner: postgres @@ -2976,40 +3438,106 @@ ALTER TABLE public.keywords OWNER TO postgres; -- CREATE MATERIALIZED VIEW frontend.keyword_scores AS - WITH latest_en_descriptions AS ( - SELECT DISTINCT ON (sad.store_app) sad.store_app, - sad.id AS description_id - FROM (public.store_apps_descriptions sad - JOIN public.description_keywords dk ON ((sad.id = dk.description_id))) - WHERE (sad.language_id = 1) - ORDER BY sad.store_app, sad.updated_at DESC - ), keyword_app_counts AS ( + WITH keyword_app_counts AS ( SELECT sa.store, k.keyword_text, - dk.keyword_id, - count(DISTINCT led.store_app) AS app_count - FROM (((latest_en_descriptions led - LEFT JOIN public.description_keywords dk ON ((led.description_id = dk.description_id))) - LEFT JOIN public.keywords k ON ((dk.keyword_id = k.id))) - LEFT JOIN public.store_apps sa ON ((led.store_app = sa.id))) - WHERE (dk.keyword_id IS NOT NULL) - GROUP BY sa.store, k.keyword_text, dk.keyword_id + ake.keyword_id, + count(DISTINCT ake.store_app) AS app_count, + array_length(string_to_array((k.keyword_text)::text, ' '::text), 1) AS word_count + FROM ((public.app_keywords_extracted ake + LEFT JOIN public.keywords k ON ((ake.keyword_id = k.id))) + LEFT JOIN public.store_apps sa ON ((ake.store_app = sa.id))) + GROUP BY sa.store, k.keyword_text, ake.keyword_id ), total_app_count AS ( SELECT sa.store, - count(*) AS total_apps - FROM (latest_en_descriptions led - LEFT JOIN public.store_apps sa ON ((led.store_app = sa.id))) + count(DISTINCT ake.store_app) AS total_apps + FROM (public.app_keywords_extracted ake + LEFT JOIN public.store_apps sa ON ((ake.store_app = sa.id))) GROUP BY sa.store + ), keyword_competitors AS ( + SELECT ake.keyword_id, + sa.store, + avg(COALESCE(NULLIF(agml.installs, 0), (agml.rating_count * 25))) AS avg_installs, + max(COALESCE(NULLIF(agml.installs, 0), (agml.rating_count * 25))) AS max_installs, + percentile_cont((0.5)::double precision) WITHIN GROUP (ORDER BY ((COALESCE(NULLIF(agml.installs, 0), (agml.rating_count * 25)))::double precision)) AS median_installs, + avg(agml.rating) AS avg_rating, + count(*) FILTER (WHERE (COALESCE(NULLIF(agml.installs, 0), (agml.rating_count * 25)) > 1000000)) AS apps_over_1m_installs, + count(*) FILTER (WHERE ((sa.name)::text ~~* (('%'::text || (k.keyword_text)::text) || '%'::text))) AS title_matches + FROM (((public.app_keywords_extracted ake + LEFT JOIN public.store_apps sa ON ((ake.store_app = sa.id))) + LEFT JOIN public.app_global_metrics_latest agml ON ((sa.id = agml.store_app))) + LEFT JOIN public.keywords k ON ((ake.keyword_id = k.id))) + GROUP BY ake.keyword_id, sa.store, k.keyword_text + ), keyword_metrics AS ( + SELECT kac.store, + kac.keyword_text, + kac.keyword_id, + kac.app_count, + round(kc.avg_installs, 0) AS avg_installs, + tac.total_apps, + round(((100.0 * (kac.app_count)::numeric) / (NULLIF(tac.total_apps, 0))::numeric), 2) AS market_penetration_pct, + round(((100)::numeric * (((1)::double precision - (ln(((tac.total_apps)::double precision / ((kac.app_count + 1))::double precision)) / ln((tac.total_apps)::double precision))))::numeric), 2) AS competitiveness_score, + kac.word_count, + CASE + WHEN (kac.word_count = 1) THEN 'short_tail'::text + WHEN (kac.word_count = 2) THEN 'medium_tail'::text + ELSE 'long_tail'::text + END AS keyword_type, + length((kac.keyword_text)::text) AS char_length, + (COALESCE(kc.avg_installs, (0)::numeric))::bigint AS avg_competitor_installs, + COALESCE(kc.max_installs, (0)::bigint) AS top_competitor_installs, + (COALESCE(kc.median_installs, (0)::double precision))::bigint AS median_competitor_installs, + COALESCE(kc.avg_rating, (0)::double precision) AS avg_competitor_rating, + COALESCE(kc.apps_over_1m_installs, (0)::bigint) AS major_competitors, + COALESCE(kc.title_matches, (0)::bigint) AS title_matches, + round(((100.0 * (COALESCE(kc.title_matches, (0)::bigint))::numeric) / (NULLIF(kac.app_count, 0))::numeric), 2) AS title_relevance_pct + FROM ((keyword_app_counts kac + LEFT JOIN total_app_count tac ON ((kac.store = tac.store))) + LEFT JOIN keyword_competitors kc ON (((kac.keyword_id = kc.keyword_id) AND (kac.store = kc.store)))) ) - SELECT kac.store, - kac.keyword_text, - kac.keyword_id, - kac.app_count, - tac.total_apps, - round(((100)::numeric * (((1)::double precision - (ln(((tac.total_apps)::double precision / ((kac.app_count + 1))::double precision)) / ln((tac.total_apps)::double precision))))::numeric), 2) AS competitiveness_score - FROM (keyword_app_counts kac - LEFT JOIN total_app_count tac ON ((kac.store = tac.store))) - ORDER BY (round(((100)::numeric * (((1)::double precision - (ln(((tac.total_apps)::double precision / ((kac.app_count + 1))::double precision)) / ln((tac.total_apps)::double precision))))::numeric), 2)) DESC + SELECT store, + keyword_text, + keyword_id, + app_count, + avg_installs, + total_apps, + market_penetration_pct, + competitiveness_score, + word_count, + keyword_type, + char_length, + avg_competitor_installs, + top_competitor_installs, + median_competitor_installs, + avg_competitor_rating, + major_competitors, + title_matches, + title_relevance_pct, + round(LEAST((100)::numeric, ((((app_count)::numeric * 10.0) * ((100)::numeric - competitiveness_score)) / 100.0)), 2) AS volume_competition_score, + round(LEAST((100)::numeric, ((competitiveness_score * 0.6) + (LEAST((100)::numeric, ((COALESCE(avg_competitor_installs, (0)::bigint))::numeric / 100000.0)) * 0.4))), 2) AS keyword_difficulty, + round( + CASE + WHEN (app_count < 10) THEN (0)::double precision + WHEN ((major_competitors)::numeric > ((app_count)::numeric * 0.25)) THEN (20)::double precision + ELSE ((LEAST((40)::double precision, (log(((app_count + 1))::double precision) * (10)::double precision)) + ((((100)::numeric - competitiveness_score) * 0.4))::double precision) + ( + CASE + WHEN (COALESCE(median_competitor_installs, (0)::bigint) < 100000) THEN 20 + WHEN (COALESCE(median_competitor_installs, (0)::bigint) < 1000000) THEN 15 + WHEN (COALESCE(median_competitor_installs, (0)::bigint) < 10000000) THEN 10 + ELSE 5 + END)::double precision) + END) AS opportunity_score, + CASE + WHEN (app_count > 0) THEN round(((((app_count)::numeric * 1000.0) * (1.0 / ((1)::numeric + (competitiveness_score / 50.0)))) * + CASE + WHEN (word_count = 1) THEN 2.0 + WHEN (word_count = 2) THEN 1.0 + ELSE 0.5 + END), 0) + ELSE (0)::numeric + END AS estimated_monthly_searches, + round(((100)::numeric - LEAST((100)::numeric, ((((major_competitors)::numeric * 10.0) + ((COALESCE(median_competitor_installs, (0)::bigint))::numeric / 100000.0)) + (competitiveness_score * 0.3)))), 2) AS ranking_feasibility + FROM keyword_metrics km WITH NO DATA; @@ -3058,6 +3586,57 @@ CREATE MATERIALIZED VIEW frontend.latest_sdk_scanned_apps AS ALTER MATERIALIZED VIEW frontend.latest_sdk_scanned_apps OWNER TO postgres; +-- +-- Name: mediation_adapter_app_counts; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- + +CREATE MATERIALIZED VIEW frontend.mediation_adapter_app_counts AS + WITH filter_mediation_strings AS ( + SELECT vs.id AS string_id, + sd.company_id AS mediation_company_id, + vs.value_name AS full_sdk, + regexp_replace(regexp_replace(vs.value_name, concat(cmp.mediation_pattern, '.'), ''::text), '\..*$'::text, ''::text) AS adapter_string + FROM ((public.version_strings vs + JOIN adtech.sdk_mediation_patterns cmp ON ((lower(vs.value_name) ~~ (lower(concat((cmp.mediation_pattern)::text, '.')) || '%'::text)))) + JOIN adtech.sdks sd ON ((cmp.sdk_id = sd.id))) + ), mediation_strings AS ( + SELECT fms.string_id, + fms.mediation_company_id, + cma.company_id AS adapter_company_id, + fms.adapter_string, + fms.full_sdk + FROM (filter_mediation_strings fms + LEFT JOIN adtech.company_mediation_adapters cma ON ((lower(fms.adapter_string) ~~ (lower((cma.adapter_pattern)::text) || '%'::text)))) + WHERE (fms.mediation_company_id <> cma.company_id) + ), app_counts AS ( + SELECT ms.mediation_company_id, + ms.adapter_string, + ms.adapter_company_id, + cm.mapped_category AS app_category, + count(DISTINCT sass.store_app) AS app_count + FROM (((adtech.store_app_sdk_strings sass + JOIN mediation_strings ms ON ((sass.version_string_id = ms.string_id))) + LEFT JOIN public.store_apps sa ON ((sass.store_app = sa.id))) + LEFT JOIN public.category_mapping cm ON (((sa.category)::text = (cm.original_category)::text))) + GROUP BY ms.mediation_company_id, ms.adapter_string, ms.adapter_company_id, cm.mapped_category + ) + SELECT md.domain_name AS mediation_domain, + ac.adapter_string, + ad.domain_name AS adapter_domain, + adc.name AS adapter_company_name, + adc.logo_url AS adapter_logo_url, + ac.app_category, + ac.app_count + FROM ((((app_counts ac + LEFT JOIN adtech.companies mdc ON ((ac.mediation_company_id = mdc.id))) + LEFT JOIN public.domains md ON ((mdc.domain_id = md.id))) + LEFT JOIN adtech.companies adc ON ((ac.adapter_company_id = adc.id))) + LEFT JOIN public.domains ad ON ((adc.domain_id = ad.id))) + WITH NO DATA; + + +ALTER MATERIALIZED VIEW frontend.mediation_adapter_app_counts OWNER TO postgres; + -- -- Name: store_app_api_companies; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres -- @@ -3092,23 +3671,6 @@ CREATE MATERIALIZED VIEW frontend.store_app_api_companies AS ALTER MATERIALIZED VIEW frontend.store_app_api_companies OWNER TO postgres; --- --- Name: store_app_ranks_weekly; Type: TABLE; Schema: frontend; Owner: postgres --- - -CREATE TABLE frontend.store_app_ranks_weekly ( - rank smallint NOT NULL, - best_rank smallint NOT NULL, - country smallint NOT NULL, - store_collection smallint NOT NULL, - store_category smallint NOT NULL, - crawled_date date NOT NULL, - store_app integer NOT NULL -); - - -ALTER TABLE frontend.store_app_ranks_weekly OWNER TO postgres; - -- -- Name: store_categories; Type: TABLE; Schema: public; Owner: postgres -- @@ -3198,6 +3760,7 @@ CREATE MATERIALIZED VIEW frontend.store_app_ranks_latest AS sa.name, sa.store_id, sa.store, + sa.developer_name, sa.installs, sa.rating_count, sa.rating, @@ -3221,10 +3784,10 @@ CREATE MATERIALIZED VIEW frontend.store_app_ranks_latest AS ALTER MATERIALIZED VIEW frontend.store_app_ranks_latest OWNER TO postgres; -- --- Name: store_apps_z_scores; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres +-- Name: z_scores_top_apps; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres -- -CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS +CREATE MATERIALIZED VIEW frontend.z_scores_top_apps AS WITH app_metrics AS ( SELECT app_global_metrics_latest.store_app, app_global_metrics_latest.rating, @@ -3240,23 +3803,22 @@ CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS saz.ratings_avg_2w, saz.installs_z_score_2w, saz.ratings_z_score_2w, - saz.installs_sum_4w, + sa.installs_sum_4w_est AS installs_sum_4w, saz.ratings_sum_4w, saz.installs_avg_4w, saz.ratings_avg_4w, saz.installs_z_score_4w, saz.ratings_z_score_4w, sa.id, - sa.developer, + sa.developer_id, + sa.developer_name, sa.name, sa.store_id, sa.store, - sa.category, - am.installs, + sa.category AS app_category, + sa.installs_est AS installs, sa.free, - sa.price, sa.store_last_updated, - sa.content_rating, sa.ad_supported, sa.in_app_purchases, sa.created_at, @@ -3265,9 +3827,7 @@ CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS sa.release_date, am.rating_count, sa.icon_url_100, - cm.original_category, - cm.mapped_category, - row_number() OVER (PARTITION BY sa.store, cm.mapped_category, + row_number() OVER (PARTITION BY sa.store, sa.category, CASE WHEN (sa.store = 2) THEN 'rating'::text ELSE 'installs'::text @@ -3277,15 +3837,16 @@ CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS WHEN (sa.store = 1) THEN saz.installs_z_score_2w ELSE NULL::numeric END DESC NULLS LAST) AS rn - FROM (((public.store_app_z_scores saz - LEFT JOIN public.store_apps sa ON ((saz.store_app = sa.id))) + FROM ((public.store_app_z_scores saz + LEFT JOIN frontend.store_apps_overview sa ON ((saz.store_app = sa.id))) LEFT JOIN app_metrics am ON ((saz.store_app = am.store_app))) - LEFT JOIN public.category_mapping cm ON (((sa.category)::text = (cm.original_category)::text))) + WHERE (sa.store = ANY (ARRAY[1, 2])) ) SELECT store, store_id, name AS app_name, - mapped_category AS app_category, + developer_name, + app_category, in_app_purchases, ad_supported, icon_url_100, @@ -3308,26 +3869,7 @@ CREATE MATERIALIZED VIEW frontend.store_apps_z_scores AS WITH NO DATA; -ALTER MATERIALIZED VIEW frontend.store_apps_z_scores OWNER TO postgres; - --- --- Name: total_categories_app_counts; Type: MATERIALIZED VIEW; Schema: frontend; Owner: postgres --- - -CREATE MATERIALIZED VIEW frontend.total_categories_app_counts AS - SELECT sa.store, - tag.tag_source, - csac.app_category, - count(DISTINCT csac.store_app) AS app_count - FROM ((adtech.combined_store_apps_companies csac - LEFT JOIN public.store_apps sa ON ((csac.store_app = sa.id))) - CROSS JOIN LATERAL ( VALUES ('sdk'::text,csac.sdk), ('api_call'::text,csac.api_call), ('app_ads_direct'::text,csac.app_ads_direct), ('app_ads_reseller'::text,csac.app_ads_reseller)) tag(tag_source, present)) - WHERE (tag.present IS TRUE) - GROUP BY sa.store, tag.tag_source, csac.app_category - WITH NO DATA; - - -ALTER MATERIALIZED VIEW frontend.total_categories_app_counts OWNER TO postgres; +ALTER MATERIALIZED VIEW frontend.z_scores_top_apps OWNER TO postgres; -- -- Name: app_country_crawls; Type: TABLE; Schema: logging; Owner: postgres @@ -3343,6 +3885,19 @@ CREATE TABLE logging.app_country_crawls ( ALTER TABLE logging.app_country_crawls OWNER TO postgres; +-- +-- Name: app_description_keywords_extracted; Type: TABLE; Schema: logging; Owner: postgres +-- + +CREATE TABLE logging.app_description_keywords_extracted ( + store_app integer NOT NULL, + description_id integer NOT NULL, + extracted_at timestamp without time zone +); + + +ALTER TABLE logging.app_description_keywords_extracted OWNER TO postgres; + -- -- Name: creative_scan_results; Type: TABLE; Schema: logging; Owner: postgres -- @@ -3456,23 +4011,6 @@ CREATE TABLE logging.store_app_waydroid_crawled_at ( ALTER TABLE logging.store_app_waydroid_crawled_at OWNER TO postgres; --- --- Name: store_apps_audit; Type: TABLE; Schema: logging; Owner: postgres --- - -CREATE TABLE logging.store_apps_audit ( - operation character(1) NOT NULL, - stamp timestamp without time zone NOT NULL, - userid text NOT NULL, - row_id bigint NOT NULL, - store smallint NOT NULL, - store_id text NOT NULL, - crawl_result integer -); - - -ALTER TABLE logging.store_apps_audit OWNER TO postgres; - -- -- Name: store_apps_snapshot; Type: TABLE; Schema: logging; Owner: postgres -- @@ -3667,45 +4205,6 @@ CREATE MATERIALIZED VIEW public.app_country_metrics_latest AS ALTER MATERIALIZED VIEW public.app_country_metrics_latest OWNER TO postgres; --- --- Name: app_keyword_rankings; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE public.app_keyword_rankings ( - id integer NOT NULL, - crawled_date date NOT NULL, - country smallint NOT NULL, - lang smallint NOT NULL, - keyword integer NOT NULL, - rank smallint NOT NULL, - store_app integer NOT NULL -); - - -ALTER TABLE public.app_keyword_rankings OWNER TO postgres; - --- --- Name: app_keyword_rankings_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.app_keyword_rankings_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER SEQUENCE public.app_keyword_rankings_id_seq OWNER TO postgres; - --- --- Name: app_keyword_rankings_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.app_keyword_rankings_id_seq OWNED BY public.app_keyword_rankings.id; - - -- -- Name: app_urls_map_id_seq; Type: SEQUENCE; Schema: public; Owner: james -- @@ -3720,27 +4219,6 @@ ALTER TABLE public.app_urls_map ALTER COLUMN id ADD GENERATED BY DEFAULT AS IDEN ); --- --- Name: audit_dates; Type: MATERIALIZED VIEW; Schema: public; Owner: postgres --- - -CREATE MATERIALIZED VIEW public.audit_dates AS - WITH sa AS ( - SELECT (store_apps_audit.stamp)::date AS updated_date, - 'store_apps'::text AS table_name, - count(*) AS updated_count - FROM logging.store_apps_audit - GROUP BY ((store_apps_audit.stamp)::date) - ) - SELECT updated_date, - table_name, - updated_count - FROM sa - WITH NO DATA; - - -ALTER MATERIALIZED VIEW public.audit_dates OWNER TO postgres; - -- -- Name: crawl_results; Type: TABLE; Schema: public; Owner: james -- @@ -3886,28 +4364,6 @@ ALTER SEQUENCE public.creative_records_id_seq1 OWNER TO postgres; ALTER SEQUENCE public.creative_records_id_seq1 OWNED BY public.creative_records.id; --- --- Name: description_keywords_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.description_keywords_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER SEQUENCE public.description_keywords_id_seq OWNER TO postgres; - --- --- Name: description_keywords_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.description_keywords_id_seq OWNED BY public.description_keywords.id; - - -- -- Name: developer_store_apps; Type: MATERIALIZED VIEW; Schema: public; Owner: postgres -- @@ -3972,6 +4428,25 @@ ALTER SEQUENCE public.domains_id_seq OWNER TO postgres; ALTER SEQUENCE public.domains_id_seq OWNED BY public.domains.id; +-- +-- Name: global_retention_benchmarks; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.global_retention_benchmarks ( + store_id smallint NOT NULL, + app_category text NOT NULL, + d1 numeric(6,5) NOT NULL, + d7 numeric(6,5) NOT NULL, + d30 numeric(6,5) NOT NULL, + CONSTRAINT global_retention_benchmarks_d1_check CHECK (((d1 > (0)::numeric) AND (d1 <= (1)::numeric))), + CONSTRAINT global_retention_benchmarks_d30_check CHECK (((d30 > (0)::numeric) AND (d30 <= (1)::numeric))), + CONSTRAINT global_retention_benchmarks_d7_check CHECK (((d7 > (0)::numeric) AND (d7 <= (1)::numeric))), + CONSTRAINT retention_monotonic_check CHECK (((d1 >= d7) AND (d7 >= d30))) +); + + +ALTER TABLE public.global_retention_benchmarks OWNER TO postgres; + -- -- Name: ip_geo_snapshots_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres -- @@ -4142,36 +4617,6 @@ CREATE TABLE public.store_app_z_scores_history ( ALTER TABLE public.store_app_z_scores_history OWNER TO postgres; --- --- Name: store_apps_created_at; Type: MATERIALIZED VIEW; Schema: public; Owner: postgres --- - -CREATE MATERIALIZED VIEW public.store_apps_created_at AS - WITH my_dates AS ( - SELECT num_series.store, - (generate_series((CURRENT_DATE - '365 days'::interval), (CURRENT_DATE)::timestamp without time zone, '1 day'::interval))::date AS date - FROM generate_series(1, 2, 1) num_series(store) - ), created_dates AS ( - SELECT sa.store, - (sa.created_at)::date AS created_date, - sas.crawl_source, - count(*) AS created_count - FROM (public.store_apps sa - LEFT JOIN logging.store_app_sources sas ON (((sa.id = sas.store_app) AND (sa.store = sas.store)))) - WHERE (sa.created_at >= (CURRENT_DATE - '365 days'::interval)) - GROUP BY sa.store, ((sa.created_at)::date), sas.crawl_source - ) - SELECT my_dates.store, - my_dates.date, - created_dates.crawl_source, - created_dates.created_count - FROM (my_dates - LEFT JOIN created_dates ON (((my_dates.date = created_dates.created_date) AND (my_dates.store = created_dates.store)))) - WITH NO DATA; - - -ALTER MATERIALIZED VIEW public.store_apps_created_at OWNER TO postgres; - -- -- Name: store_apps_descriptions_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres -- @@ -4221,7 +4666,7 @@ CREATE MATERIALIZED VIEW public.store_apps_in_latest_rankings AS sa.installs, sa.rating_count, sa.store_id - FROM (frontend.store_apps_z_scores saz + FROM (frontend.z_scores_top_apps saz LEFT JOIN frontend.store_apps_overview sa ON (((saz.store_id)::text = (sa.store_id)::text))) WHERE sa.free ORDER BY COALESCE(saz.installs_z_score_2w, saz.ratings_z_score_2w) DESC @@ -4252,44 +4697,14 @@ UNION ranked_apps.store, ranked_apps.store_last_updated, ranked_apps.name, - ranked_apps.installs, - ranked_apps.rating_count, - ranked_apps.store_id - FROM ranked_apps - WITH NO DATA; - - -ALTER MATERIALIZED VIEW public.store_apps_in_latest_rankings OWNER TO postgres; - --- --- Name: store_apps_updated_at; Type: MATERIALIZED VIEW; Schema: public; Owner: postgres --- - -CREATE MATERIALIZED VIEW public.store_apps_updated_at AS - WITH my_dates AS ( - SELECT num_series.store, - (generate_series((CURRENT_DATE - '365 days'::interval), (CURRENT_DATE)::timestamp without time zone, '1 day'::interval))::date AS date - FROM generate_series(1, 2, 1) num_series(store) - ), updated_dates AS ( - SELECT store_apps.store, - (store_apps.updated_at)::date AS last_updated_date, - count(*) AS last_updated_count - FROM public.store_apps - WHERE (store_apps.updated_at >= (CURRENT_DATE - '365 days'::interval)) - GROUP BY store_apps.store, ((store_apps.updated_at)::date) - ) - SELECT my_dates.store, - my_dates.date, - updated_dates.last_updated_count, - audit_dates.updated_count - FROM ((my_dates - LEFT JOIN updated_dates ON (((my_dates.date = updated_dates.last_updated_date) AND (my_dates.store = updated_dates.store)))) - LEFT JOIN public.audit_dates ON ((my_dates.date = audit_dates.updated_date))) - ORDER BY my_dates.date DESC + ranked_apps.installs, + ranked_apps.rating_count, + ranked_apps.store_id + FROM ranked_apps WITH NO DATA; -ALTER MATERIALIZED VIEW public.store_apps_updated_at OWNER TO postgres; +ALTER MATERIALIZED VIEW public.store_apps_in_latest_rankings OWNER TO postgres; -- -- Name: store_categories_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres @@ -4493,41 +4908,6 @@ CREATE MATERIALIZED VIEW public.total_count_overview AS ALTER MATERIALIZED VIEW public.total_count_overview OWNER TO postgres; --- --- Name: user_requested_scan; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE public.user_requested_scan ( - id integer NOT NULL, - store_id character varying NOT NULL, - created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE public.user_requested_scan OWNER TO postgres; - --- --- Name: user_requested_scan_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.user_requested_scan_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER SEQUENCE public.user_requested_scan_id_seq OWNER TO postgres; - --- --- Name: user_requested_scan_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.user_requested_scan_id_seq OWNED BY public.user_requested_scan.id; - - -- -- Name: version_code_api_scan_results_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres -- @@ -4663,13 +5043,6 @@ ALTER TABLE ONLY public.adstxt_crawl_results ALTER COLUMN id SET DEFAULT nextval ALTER TABLE ONLY public.api_calls ALTER COLUMN id SET DEFAULT nextval('public.api_calls_id_seq'::regclass); --- --- Name: app_keyword_rankings id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings ALTER COLUMN id SET DEFAULT nextval('public.app_keyword_rankings_id_seq'::regclass); - - -- -- Name: crawl_scenario_country_config id; Type: DEFAULT; Schema: public; Owner: postgres -- @@ -4698,13 +5071,6 @@ ALTER TABLE ONLY public.creative_assets ALTER COLUMN id SET DEFAULT nextval('pub ALTER TABLE ONLY public.creative_records ALTER COLUMN id SET DEFAULT nextval('public.creative_records_id_seq1'::regclass); --- --- Name: description_keywords id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.description_keywords ALTER COLUMN id SET DEFAULT nextval('public.description_keywords_id_seq'::regclass); - - -- -- Name: domains id; Type: DEFAULT; Schema: public; Owner: postgres -- @@ -4754,13 +5120,6 @@ ALTER TABLE ONLY public.store_apps_descriptions ALTER COLUMN id SET DEFAULT next ALTER TABLE ONLY public.stores ALTER COLUMN id SET DEFAULT nextval('public.stores_column1_seq'::regclass); --- --- Name: user_requested_scan id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.user_requested_scan ALTER COLUMN id SET DEFAULT nextval('public.user_requested_scan_id_seq'::regclass); - - -- -- Name: version_code_api_scan_results id; Type: DEFAULT; Schema: public; Owner: postgres -- @@ -4831,6 +5190,22 @@ ALTER TABLE ONLY adtech.company_domain_mapping ADD CONSTRAINT company_domain_mapping_pkey PRIMARY KEY (company_id, domain_id); +-- +-- Name: company_mediation_adapters company_mediation_adapters_pkey; Type: CONSTRAINT; Schema: adtech; Owner: postgres +-- + +ALTER TABLE ONLY adtech.company_mediation_adapters + ADD CONSTRAINT company_mediation_adapters_pkey PRIMARY KEY (company_id, adapter_pattern); + + +-- +-- Name: sdk_mediation_patterns company_mediation_patterns_pkey; Type: CONSTRAINT; Schema: adtech; Owner: postgres +-- + +ALTER TABLE ONLY adtech.sdk_mediation_patterns + ADD CONSTRAINT company_mediation_patterns_pkey PRIMARY KEY (sdk_id, mediation_pattern); + + -- -- Name: sdk_categories sdk_categories_pkey; Type: CONSTRAINT; Schema: adtech; Owner: postgres -- @@ -4924,7 +5299,7 @@ ALTER TABLE ONLY adtech.urls -- ALTER TABLE ONLY frontend.app_keyword_ranks_daily - ADD CONSTRAINT app_keyword_rankings_unique_test UNIQUE (crawled_date, country, keyword_id, app_rank); + ADD CONSTRAINT app_keyword_rankings_unique_test UNIQUE (crawled_date, store, country, keyword_id, app_rank); -- @@ -4943,6 +5318,14 @@ ALTER TABLE ONLY frontend.store_app_ranks_weekly ADD CONSTRAINT app_rankings_unique_test UNIQUE (crawled_date, country, store_collection, store_category, rank); +-- +-- Name: app_description_keywords_extracted app_description_keywords_extracted_pk; Type: CONSTRAINT; Schema: logging; Owner: postgres +-- + +ALTER TABLE ONLY logging.app_description_keywords_extracted + ADD CONSTRAINT app_description_keywords_extracted_pk PRIMARY KEY (store_app, description_id); + + -- -- Name: developers_crawled_at developers_crawled_at_pk; Type: CONSTRAINT; Schema: logging; Owner: postgres -- @@ -5031,14 +5414,6 @@ ALTER TABLE ONLY public.app_ads_entrys ADD CONSTRAINT app_ads_txt_un UNIQUE (ad_domain, publisher_id, relationship); --- --- Name: app_keyword_rankings app_keyword_rankings_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT app_keyword_rankings_pkey PRIMARY KEY (id); - - -- -- Name: app_urls_map app_urls_map_pkey; Type: CONSTRAINT; Schema: public; Owner: james -- @@ -5152,19 +5527,11 @@ ALTER TABLE ONLY public.creative_records -- --- Name: description_keywords description_keywords_description_id_keyword_id_key; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.description_keywords - ADD CONSTRAINT description_keywords_description_id_keyword_id_key UNIQUE (description_id, keyword_id); - - --- --- Name: description_keywords description_keywords_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- Name: app_keywords_extracted description_keywords_app_id_keyword_id_key; Type: CONSTRAINT; Schema: public; Owner: postgres -- -ALTER TABLE ONLY public.description_keywords - ADD CONSTRAINT description_keywords_pkey PRIMARY KEY (id); +ALTER TABLE ONLY public.app_keywords_extracted + ADD CONSTRAINT description_keywords_app_id_keyword_id_key UNIQUE (store_app, keyword_id); -- @@ -5199,6 +5566,14 @@ ALTER TABLE ONLY public.domains ADD CONSTRAINT domains_pkey PRIMARY KEY (id); +-- +-- Name: global_retention_benchmarks global_retention_benchmarks_pk; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.global_retention_benchmarks + ADD CONSTRAINT global_retention_benchmarks_pk PRIMARY KEY (store_id, app_category); + + -- -- Name: ip_geo_snapshots ip_geo_snapshots_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres -- @@ -5343,22 +5718,6 @@ ALTER TABLE ONLY public.keywords ADD CONSTRAINT unique_keyword UNIQUE (keyword_text); --- --- Name: app_keyword_rankings unique_keyword_ranking; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT unique_keyword_ranking UNIQUE (crawled_date, country, lang, rank, store_app, keyword); - - --- --- Name: user_requested_scan user_requested_scan_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.user_requested_scan - ADD CONSTRAINT user_requested_scan_pkey PRIMARY KEY (id); - - -- -- Name: version_code_api_scan_results version_code_api_scan_results_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres -- @@ -5446,13 +5805,6 @@ ALTER TABLE ONLY public.version_strings CREATE UNIQUE INDEX combined_store_app_companies_idx ON adtech.combined_store_apps_companies USING btree (ad_domain, store_app, app_category, company_id); --- --- Name: company_sdk_strings_version_string_id_company_id_idx; Type: INDEX; Schema: adtech; Owner: postgres --- - -CREATE UNIQUE INDEX company_sdk_strings_version_string_id_company_id_idx ON adtech.company_sdk_strings USING btree (version_string_id, company_id); - - -- -- Name: idx_combined_store_apps_parent_companies_idx; Type: INDEX; Schema: adtech; Owner: postgres -- @@ -5516,11 +5868,18 @@ CREATE INDEX sdk_path_pattern_idx ON adtech.sdk_paths USING btree (path_pattern) CREATE INDEX sdk_paths_path_pattern_trgm_idx ON adtech.sdk_paths USING gin (lower((path_pattern)::text) public.gin_trgm_ops); +-- +-- Name: sdk_strings_version_string_id_sdk_id_idx; Type: INDEX; Schema: adtech; Owner: postgres +-- + +CREATE UNIQUE INDEX sdk_strings_version_string_id_sdk_id_idx ON adtech.sdk_strings USING btree (version_string_id, sdk_id); + + -- -- Name: store_app_sdk_strings_idx; Type: INDEX; Schema: adtech; Owner: postgres -- -CREATE UNIQUE INDEX store_app_sdk_strings_idx ON adtech.store_app_sdk_strings USING btree (store_app, version_string_id, company_id); +CREATE UNIQUE INDEX store_app_sdk_strings_idx ON adtech.store_app_sdk_strings USING btree (store_app, version_string_id, sdk_id); -- @@ -5531,10 +5890,10 @@ CREATE UNIQUE INDEX url_redirect_chains_unique_idx ON adtech.url_redirect_chains -- --- Name: urls_idx; Type: INDEX; Schema: adtech; Owner: postgres +-- Name: urls_url_hash_idx; Type: INDEX; Schema: adtech; Owner: postgres -- -CREATE UNIQUE INDEX urls_idx ON adtech.urls USING btree (md5(url)); +CREATE UNIQUE INDEX urls_url_hash_idx ON adtech.urls USING btree (url_hash); -- @@ -5607,6 +5966,13 @@ CREATE INDEX app_keyword_ranks_daily_app_lookup ON frontend.app_keyword_ranks_da CREATE INDEX app_keyword_ranks_daily_date ON frontend.app_keyword_ranks_daily USING btree (crawled_date); +-- +-- Name: app_keywords_delete_and_insert_on; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX app_keywords_delete_and_insert_on ON frontend.app_keyword_ranks_daily USING btree (crawled_date, store); + + -- -- Name: companies_apps_overview_idx; Type: INDEX; Schema: frontend; Owner: postgres -- @@ -5649,20 +6015,6 @@ CREATE INDEX companies_category_tag_stats__query_idx ON frontend.companies_categ CREATE UNIQUE INDEX companies_category_tag_stats_idx ON frontend.companies_category_tag_stats USING btree (store, tag_source, app_category, company_domain); --- --- Name: companies_category_tag_type_stats_idx; Type: INDEX; Schema: frontend; Owner: postgres --- - -CREATE UNIQUE INDEX companies_category_tag_type_stats_idx ON frontend.companies_category_tag_type_stats USING btree (store, tag_source, app_category, company_domain, type_url_slug); - - --- --- Name: companies_category_tag_type_stats_query_idx; Type: INDEX; Schema: frontend; Owner: postgres --- - -CREATE INDEX companies_category_tag_type_stats_query_idx ON frontend.companies_category_tag_type_stats USING btree (type_url_slug, app_category); - - -- -- Name: companies_open_source_percent_unique; Type: INDEX; Schema: frontend; Owner: postgres -- @@ -5706,10 +6058,17 @@ CREATE UNIQUE INDEX companies_sdks_overview_unique_idx ON frontend.companies_sdk -- --- Name: frontend_store_apps_z_scores_unique; Type: INDEX; Schema: frontend; Owner: postgres +-- Name: frontend_companies_category_tag_type_stats_unique; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE UNIQUE INDEX frontend_companies_category_tag_type_stats_unique ON frontend.companies_category_tag_type_stats USING btree (store, app_category, tag_source, company_domain, type_url_slug); + + +-- +-- Name: frontend_z_scores_top_apps_unique; Type: INDEX; Schema: frontend; Owner: postgres -- -CREATE UNIQUE INDEX frontend_store_apps_z_scores_unique ON frontend.store_apps_z_scores USING btree (store, store_id); +CREATE UNIQUE INDEX frontend_z_scores_top_apps_unique ON frontend.z_scores_top_apps USING btree (store, store_id); -- @@ -5859,13 +6218,6 @@ CREATE INDEX idx_store_app_ranks_best_monthly_store ON frontend.store_app_ranks_ CREATE UNIQUE INDEX idx_store_app_ranks_latest_filter_sort ON frontend.store_app_ranks_latest USING btree (store_collection, store_category, country, rank); --- --- Name: idx_total_categories_app_counts; Type: INDEX; Schema: frontend; Owner: postgres --- - -CREATE UNIQUE INDEX idx_total_categories_app_counts ON frontend.total_categories_app_counts USING btree (store, tag_source, app_category); - - -- -- Name: idx_unique_company_domains_top_apps; Type: INDEX; Schema: frontend; Owner: postgres -- @@ -5881,10 +6233,10 @@ CREATE UNIQUE INDEX idx_unique_company_top_apps ON frontend.company_top_apps USI -- --- Name: keyword_scores_unique; Type: INDEX; Schema: frontend; Owner: postgres +-- Name: keyword_scores_store_keyword_id_idx; Type: INDEX; Schema: frontend; Owner: postgres -- -CREATE UNIQUE INDEX keyword_scores_unique ON frontend.keyword_scores USING btree (store, keyword_id); +CREATE UNIQUE INDEX keyword_scores_store_keyword_id_idx ON frontend.keyword_scores USING btree (store, keyword_id); -- @@ -5908,6 +6260,34 @@ CREATE INDEX sarw_crawled_store_collection_category_country_idx ON frontend.stor CREATE UNIQUE INDEX store_app_ranks_best_monthly_uidx ON frontend.store_app_ranks_best_monthly USING btree (store_id, country, collection, category); +-- +-- Name: store_apps_overview_installs_est_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX store_apps_overview_installs_est_idx ON frontend.store_apps_overview USING btree (installs_est DESC); + + +-- +-- Name: store_apps_overview_installs_sum_4w_est_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX store_apps_overview_installs_sum_4w_est_idx ON frontend.store_apps_overview USING btree (installs_sum_4w_est DESC); + + +-- +-- Name: store_apps_overview_store_last_updated_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX store_apps_overview_store_last_updated_idx ON frontend.store_apps_overview USING btree (store_last_updated); + + +-- +-- Name: store_apps_overview_textsearch_idx; Type: INDEX; Schema: frontend; Owner: postgres +-- + +CREATE INDEX store_apps_overview_textsearch_idx ON frontend.store_apps_overview USING gin (textsearchable); + + -- -- Name: store_apps_overview_unique_idx; Type: INDEX; Schema: frontend; Owner: postgres -- @@ -5929,6 +6309,13 @@ CREATE UNIQUE INDEX store_apps_overview_unique_store_app_idx ON frontend.store_a CREATE UNIQUE INDEX store_apps_overview_unique_store_id_idx ON frontend.store_apps_overview USING btree (store_id); +-- +-- Name: app_description_keywords_extrac_description_id_extracted_at_idx; Type: INDEX; Schema: logging; Owner: postgres +-- + +CREATE INDEX app_description_keywords_extrac_description_id_extracted_at_idx ON logging.app_description_keywords_extracted USING btree (description_id, extracted_at DESC); + + -- -- Name: logging_store_app_upsert_unique; Type: INDEX; Schema: logging; Owner: postgres -- @@ -5937,10 +6324,10 @@ CREATE UNIQUE INDEX logging_store_app_upsert_unique ON logging.store_app_waydroi -- --- Name: store_apps_audit_stamp_idx; Type: INDEX; Schema: logging; Owner: postgres +-- Name: ake_latest_idx; Type: INDEX; Schema: public; Owner: postgres -- -CREATE INDEX store_apps_audit_stamp_idx ON logging.store_apps_audit USING btree (stamp); +CREATE INDEX ake_latest_idx ON public.app_keywords_extracted USING btree (store_app, extracted_at DESC); -- @@ -6007,10 +6394,10 @@ CREATE UNIQUE INDEX app_global_metrics_weekly_diffs_week_start_store_app_idx ON -- --- Name: audit_dates_updated_date_idx; Type: INDEX; Schema: public; Owner: postgres +-- Name: app_keywords_app_index; Type: INDEX; Schema: public; Owner: postgres -- -CREATE UNIQUE INDEX audit_dates_updated_date_idx ON public.audit_dates USING btree (updated_date, table_name); +CREATE INDEX app_keywords_app_index ON public.app_keywords_extracted USING btree (store_app); -- @@ -6118,34 +6505,6 @@ CREATE INDEX idx_ip_geo_ip_created ON public.ip_geo_snapshots USING btree (ip_ad CREATE INDEX idx_ip_geo_mitm_uuid ON public.ip_geo_snapshots USING btree (mitm_uuid); --- --- Name: idx_my_materialized_view_store_date; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX idx_my_materialized_view_store_date ON public.store_apps_updated_at USING btree (store, date); - - --- --- Name: idx_store_apps_created_at; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX idx_store_apps_created_at ON public.store_apps_created_at USING btree (store, date, crawl_source); - - --- --- Name: idx_store_apps_created_atx; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE UNIQUE INDEX idx_store_apps_created_atx ON public.store_apps_created_at USING btree (store, date, crawl_source); - - --- --- Name: idx_store_apps_updated_at; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE UNIQUE INDEX idx_store_apps_updated_at ON public.store_apps_updated_at USING btree (store, date); - - -- -- Name: store_apps_descriptions_unique_hash_idx; Type: INDEX; Schema: public; Owner: postgres -- @@ -6167,6 +6526,13 @@ CREATE INDEX store_apps_developer_idx ON public.store_apps USING btree (develope CREATE INDEX store_apps_name_idx ON public.store_apps USING gin (to_tsvector('simple'::regconfig, (name)::text)); +-- +-- Name: store_apps_store_and_id_idx; Type: INDEX; Schema: public; Owner: james +-- + +CREATE INDEX store_apps_store_and_id_idx ON public.store_apps USING btree (store, id); + + -- -- Name: store_apps_store_id_idx; Type: INDEX; Schema: public; Owner: james -- @@ -6251,13 +6617,6 @@ CREATE TRIGGER app_urls_map_updated_at BEFORE UPDATE ON public.app_urls_map FOR CREATE TRIGGER developers_updated_at BEFORE UPDATE ON public.developers FOR EACH ROW EXECUTE FUNCTION public.update_modified_column(); --- --- Name: store_apps store_app_audit; Type: TRIGGER; Schema: public; Owner: james --- - -CREATE TRIGGER store_app_audit AFTER INSERT OR DELETE OR UPDATE ON public.store_apps FOR EACH ROW EXECUTE FUNCTION public.process_store_app_audit(); - - -- -- Name: store_apps store_apps_updated_at; Type: TRIGGER; Schema: public; Owner: james -- @@ -6312,6 +6671,22 @@ ALTER TABLE ONLY adtech.company_domain_mapping ADD CONSTRAINT company_domain_mapping_domain_id_fkey FOREIGN KEY (domain_id) REFERENCES public.domains(id) ON DELETE CASCADE; +-- +-- Name: company_mediation_adapters company_mediation_adapters_company_id_fkey; Type: FK CONSTRAINT; Schema: adtech; Owner: postgres +-- + +ALTER TABLE ONLY adtech.company_mediation_adapters + ADD CONSTRAINT company_mediation_adapters_company_id_fkey FOREIGN KEY (company_id) REFERENCES adtech.companies(id); + + +-- +-- Name: sdk_mediation_patterns company_mediation_patterns_company_id_fkey; Type: FK CONSTRAINT; Schema: adtech; Owner: postgres +-- + +ALTER TABLE ONLY adtech.sdk_mediation_patterns + ADD CONSTRAINT company_mediation_patterns_company_id_fkey FOREIGN KEY (sdk_id) REFERENCES adtech.sdks(id); + + -- -- Name: sdk_categories fk_category; Type: FK CONSTRAINT; Schema: adtech; Owner: postgres -- @@ -6417,11 +6792,11 @@ ALTER TABLE ONLY adtech.urls -- --- Name: store_app_ranks_weekly fk_country; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- Name: app_keyword_ranks_daily country_kr_fk; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres -- -ALTER TABLE ONLY frontend.store_app_ranks_weekly - ADD CONSTRAINT fk_country FOREIGN KEY (country) REFERENCES public.countries(id); +ALTER TABLE ONLY frontend.app_keyword_ranks_daily + ADD CONSTRAINT country_kr_fk FOREIGN KEY (country) REFERENCES public.countries(id); -- @@ -6433,11 +6808,11 @@ ALTER TABLE ONLY frontend.store_app_ranks_daily -- --- Name: store_app_ranks_weekly fk_store_app; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- Name: store_app_ranks_weekly fk_country; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres -- ALTER TABLE ONLY frontend.store_app_ranks_weekly - ADD CONSTRAINT fk_store_app FOREIGN KEY (store_app) REFERENCES public.store_apps(id) DEFERRABLE INITIALLY DEFERRED; + ADD CONSTRAINT fk_country FOREIGN KEY (country) REFERENCES public.countries(id); -- @@ -6449,11 +6824,11 @@ ALTER TABLE ONLY frontend.store_app_ranks_daily -- --- Name: store_app_ranks_weekly fk_store_category; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- Name: store_app_ranks_weekly fk_store_app; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres -- ALTER TABLE ONLY frontend.store_app_ranks_weekly - ADD CONSTRAINT fk_store_category FOREIGN KEY (store_category) REFERENCES public.store_categories(id); + ADD CONSTRAINT fk_store_app FOREIGN KEY (store_app) REFERENCES public.store_apps(id) DEFERRABLE INITIALLY DEFERRED; -- @@ -6465,11 +6840,11 @@ ALTER TABLE ONLY frontend.store_app_ranks_daily -- --- Name: store_app_ranks_weekly fk_store_collection; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- Name: store_app_ranks_weekly fk_store_category; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres -- ALTER TABLE ONLY frontend.store_app_ranks_weekly - ADD CONSTRAINT fk_store_collection FOREIGN KEY (store_collection) REFERENCES public.store_collections(id); + ADD CONSTRAINT fk_store_category FOREIGN KEY (store_category) REFERENCES public.store_categories(id); -- @@ -6480,6 +6855,38 @@ ALTER TABLE ONLY frontend.store_app_ranks_daily ADD CONSTRAINT fk_store_collection FOREIGN KEY (store_collection) REFERENCES public.store_collections(id); +-- +-- Name: store_app_ranks_weekly fk_store_collection; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- + +ALTER TABLE ONLY frontend.store_app_ranks_weekly + ADD CONSTRAINT fk_store_collection FOREIGN KEY (store_collection) REFERENCES public.store_collections(id); + + +-- +-- Name: app_keyword_ranks_daily keyword_kr_fk; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- + +ALTER TABLE ONLY frontend.app_keyword_ranks_daily + ADD CONSTRAINT keyword_kr_fk FOREIGN KEY (keyword_id) REFERENCES public.keywords(id); + + +-- +-- Name: app_keyword_ranks_daily store_app_kr_fk; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- + +ALTER TABLE ONLY frontend.app_keyword_ranks_daily + ADD CONSTRAINT store_app_kr_fk FOREIGN KEY (store_app) REFERENCES public.store_apps(id); + + +-- +-- Name: app_keyword_ranks_daily store_kr_fk; Type: FK CONSTRAINT; Schema: frontend; Owner: postgres +-- + +ALTER TABLE ONLY frontend.app_keyword_ranks_daily + ADD CONSTRAINT store_kr_fk FOREIGN KEY (store) REFERENCES public.stores(id); + + -- -- Name: app_country_crawls app_country_crawls_app_fk; Type: FK CONSTRAINT; Schema: logging; Owner: postgres -- @@ -6713,18 +7120,18 @@ ALTER TABLE ONLY public.creative_records -- --- Name: description_keywords description_keywords_description_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- Name: app_keywords_extracted description_keywords_app_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres -- -ALTER TABLE ONLY public.description_keywords - ADD CONSTRAINT description_keywords_description_id_fkey FOREIGN KEY (description_id) REFERENCES public.store_apps_descriptions(id) ON DELETE CASCADE; +ALTER TABLE ONLY public.app_keywords_extracted + ADD CONSTRAINT description_keywords_app_id_fkey FOREIGN KEY (store_app) REFERENCES public.store_apps(id) ON DELETE CASCADE; -- --- Name: description_keywords description_keywords_keyword_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- Name: app_keywords_extracted description_keywords_keyword_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres -- -ALTER TABLE ONLY public.description_keywords +ALTER TABLE ONLY public.app_keywords_extracted ADD CONSTRAINT description_keywords_keyword_id_fkey FOREIGN KEY (keyword_id) REFERENCES public.keywords(id) ON DELETE CASCADE; @@ -6736,22 +7143,6 @@ ALTER TABLE ONLY public.developers ADD CONSTRAINT developers_fk FOREIGN KEY (store) REFERENCES public.stores(id); --- --- Name: app_keyword_rankings fk_country; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT fk_country FOREIGN KEY (country) REFERENCES public.countries(id); - - --- --- Name: ip_geo_snapshots fk_country; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.ip_geo_snapshots - ADD CONSTRAINT fk_country FOREIGN KEY (country_id) REFERENCES public.countries(id); - - -- -- Name: app_country_metrics_history fk_country; Type: FK CONSTRAINT; Schema: public; Owner: postgres -- @@ -6761,27 +7152,11 @@ ALTER TABLE ONLY public.app_country_metrics_history -- --- Name: app_keyword_rankings fk_language; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT fk_language FOREIGN KEY (lang) REFERENCES public.languages(id); - - --- --- Name: app_keyword_rankings fk_store_app; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT fk_store_app FOREIGN KEY (store_app) REFERENCES public.store_apps(id); - - --- --- Name: app_keyword_rankings fk_store_keyword; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- Name: ip_geo_snapshots fk_country; Type: FK CONSTRAINT; Schema: public; Owner: postgres -- -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT fk_store_keyword FOREIGN KEY (keyword) REFERENCES public.keywords(id); +ALTER TABLE ONLY public.ip_geo_snapshots + ADD CONSTRAINT fk_country FOREIGN KEY (country_id) REFERENCES public.countries(id); -- @@ -6900,5 +7275,5 @@ GRANT ALL ON SCHEMA public TO PUBLIC; -- PostgreSQL database dump complete -- -\unrestrict vYe6z6bKpKdnNtZ6GXBJHvdhjuVYdAYwdbpsJfGPACUaXkI2Lmc94ZXYOPiUHhG +\unrestrict aqSyUPuJghUSivRpPmJ0wjpiuNqZ0Xb7iDHpQcCoga4fxvxnfEJs5GfAimQHJSS diff --git a/pg-ddl/schema/logging/app_country_crawls.sql b/pg-ddl/schema/logging/app_country_crawls.sql index 8a5b4e68..d711a4a5 100644 --- a/pg-ddl/schema/logging/app_country_crawls.sql +++ b/pg-ddl/schema/logging/app_country_crawls.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 1GS5X6ZRZi1My9z1uBZaxedq9LqCBbA2BAHKejOrKwbwN85xNgwSCQDNtUL2lZ9 +\restrict XLhCRL3eTUmBgoiSluev9YN7Yz6uAz7S0EcHyPTsOQxC8S8XtvMTA7JG6yikH6W --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -57,5 +57,5 @@ ALTER TABLE ONLY logging.app_country_crawls -- PostgreSQL database dump complete -- -\unrestrict 1GS5X6ZRZi1My9z1uBZaxedq9LqCBbA2BAHKejOrKwbwN85xNgwSCQDNtUL2lZ9 +\unrestrict XLhCRL3eTUmBgoiSluev9YN7Yz6uAz7S0EcHyPTsOQxC8S8XtvMTA7JG6yikH6W diff --git a/pg-ddl/schema/logging/app_description_keywords_extracted.sql b/pg-ddl/schema/logging/app_description_keywords_extracted.sql new file mode 100644 index 00000000..a78f94f2 --- /dev/null +++ b/pg-ddl/schema/logging/app_description_keywords_extracted.sql @@ -0,0 +1,59 @@ +-- +-- PostgreSQL database dump +-- + +\restrict qlCo85fAyeH65VKAU3DkTOVEZz8rub6PAeHaZHRs1yIYU8Xpdkdug4matYGI7Ed + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: app_description_keywords_extracted; Type: TABLE; Schema: logging; Owner: postgres +-- + +CREATE TABLE logging.app_description_keywords_extracted ( + store_app integer NOT NULL, + description_id integer NOT NULL, + extracted_at timestamp without time zone +); + + +ALTER TABLE logging.app_description_keywords_extracted OWNER TO postgres; + +-- +-- Name: app_description_keywords_extracted app_description_keywords_extracted_pk; Type: CONSTRAINT; Schema: logging; Owner: postgres +-- + +ALTER TABLE ONLY logging.app_description_keywords_extracted + ADD CONSTRAINT app_description_keywords_extracted_pk PRIMARY KEY (store_app, description_id); + + +-- +-- Name: app_description_keywords_extrac_description_id_extracted_at_idx; Type: INDEX; Schema: logging; Owner: postgres +-- + +CREATE INDEX app_description_keywords_extrac_description_id_extracted_at_idx ON logging.app_description_keywords_extracted USING btree (description_id, extracted_at DESC); + + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict qlCo85fAyeH65VKAU3DkTOVEZz8rub6PAeHaZHRs1yIYU8Xpdkdug4matYGI7Ed + diff --git a/pg-ddl/schema/logging/creative_scan_results.sql b/pg-ddl/schema/logging/creative_scan_results.sql index 58496851..2782fc5b 100644 --- a/pg-ddl/schema/logging/creative_scan_results.sql +++ b/pg-ddl/schema/logging/creative_scan_results.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict UYkE58wuYyJbULJQu2ddPydwSMoFAeUW5Lz7A5tN1lDhN3yNbnDQTBMdOPcMyg0 +\restrict lFTXpDYG6cL4SRMSInkdevzjeBNJXYkVHgnDMtZKgzp1TOnc3Pjx3xj6Uxw3rig --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -47,5 +47,5 @@ ALTER TABLE logging.creative_scan_results OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict UYkE58wuYyJbULJQu2ddPydwSMoFAeUW5Lz7A5tN1lDhN3yNbnDQTBMdOPcMyg0 +\unrestrict lFTXpDYG6cL4SRMSInkdevzjeBNJXYkVHgnDMtZKgzp1TOnc3Pjx3xj6Uxw3rig diff --git a/pg-ddl/schema/logging/developers_crawled_at.sql b/pg-ddl/schema/logging/developers_crawled_at.sql index 31be3b2c..d5f7cba8 100644 --- a/pg-ddl/schema/logging/developers_crawled_at.sql +++ b/pg-ddl/schema/logging/developers_crawled_at.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict mcbZRGhP4vHXmWZpOKe59nd7GfwyldH3DhMwAYHnZXzdGsorji8af6cHbnx7Pof +\restrict cVOjxwj29CPuf3wqYdAuaBk09lyLHqS6zqPepfs0aWzWzGo95ROxhE41ueL9Q0O --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -55,5 +55,5 @@ ALTER TABLE ONLY logging.developers_crawled_at -- PostgreSQL database dump complete -- -\unrestrict mcbZRGhP4vHXmWZpOKe59nd7GfwyldH3DhMwAYHnZXzdGsorji8af6cHbnx7Pof +\unrestrict cVOjxwj29CPuf3wqYdAuaBk09lyLHqS6zqPepfs0aWzWzGo95ROxhE41ueL9Q0O diff --git a/pg-ddl/schema/logging/keywords_crawled_at.sql b/pg-ddl/schema/logging/keywords_crawled_at.sql index e9fe13a9..20e396dc 100644 --- a/pg-ddl/schema/logging/keywords_crawled_at.sql +++ b/pg-ddl/schema/logging/keywords_crawled_at.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict UBKBKGAcbCAElviNM03RNAYoA0eWfrwaoB5QeWQbWeAYuFLC16EL4feVZYOhUdX +\restrict OCbdtYmW5qhfr6zfMStVyDmUiTm67b5fO9jEJxvHSWfMTr9Jwa9TTaLDoQJpeSo --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -55,5 +55,5 @@ ALTER TABLE ONLY logging.keywords_crawled_at -- PostgreSQL database dump complete -- -\unrestrict UBKBKGAcbCAElviNM03RNAYoA0eWfrwaoB5QeWQbWeAYuFLC16EL4feVZYOhUdX +\unrestrict OCbdtYmW5qhfr6zfMStVyDmUiTm67b5fO9jEJxvHSWfMTr9Jwa9TTaLDoQJpeSo diff --git a/pg-ddl/schema/logging/snapshot_pub_domains.sql b/pg-ddl/schema/logging/snapshot_pub_domains.sql index a1ca9671..aa2e79e8 100644 --- a/pg-ddl/schema/logging/snapshot_pub_domains.sql +++ b/pg-ddl/schema/logging/snapshot_pub_domains.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict CLhugwnvfL9oEc9BodlUDUWCvGUp0dCgej56soTMIwtCZ8eSYnCOcPSfVmkH17i +\restrict O0Yw0h1c9KD9hFxpARi1vKOMlqHMVHCoO8eIf9UYhWycrmUQ0HoTfkxEmNgEdqS --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -43,5 +43,5 @@ ALTER TABLE logging.snapshot_pub_domains OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict CLhugwnvfL9oEc9BodlUDUWCvGUp0dCgej56soTMIwtCZ8eSYnCOcPSfVmkH17i +\unrestrict O0Yw0h1c9KD9hFxpARi1vKOMlqHMVHCoO8eIf9UYhWycrmUQ0HoTfkxEmNgEdqS diff --git a/pg-ddl/schema/logging/store_app_downloads.sql b/pg-ddl/schema/logging/store_app_downloads.sql index e092562e..9d03de41 100644 --- a/pg-ddl/schema/logging/store_app_downloads.sql +++ b/pg-ddl/schema/logging/store_app_downloads.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict wY2fjQXtWg8eoLskG407ZslzCJVGGBHCdRjqXqheeIj7OhQnFejW7DnkwMI2ggc +\restrict ksZKwIrbUnmFjhbhX9sXa8yHrhnrGJmwUqc2HxviknXYcBU72E4h1bZQrfsatSc --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -49,5 +49,5 @@ ALTER TABLE ONLY logging.store_app_downloads -- PostgreSQL database dump complete -- -\unrestrict wY2fjQXtWg8eoLskG407ZslzCJVGGBHCdRjqXqheeIj7OhQnFejW7DnkwMI2ggc +\unrestrict ksZKwIrbUnmFjhbhX9sXa8yHrhnrGJmwUqc2HxviknXYcBU72E4h1bZQrfsatSc diff --git a/pg-ddl/schema/logging/store_app_no_creatives.sql b/pg-ddl/schema/logging/store_app_no_creatives.sql index b84e3994..d9f6d54c 100644 --- a/pg-ddl/schema/logging/store_app_no_creatives.sql +++ b/pg-ddl/schema/logging/store_app_no_creatives.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict fvX6fc0Gg9RG3Orah7Q6PiPLpcHsiEI1eRCqIon2aJwETxtLkjDQ1r4CFqVAWKY +\restrict bsHE1eQqGSOVHFM4yj2syBtyp4lKtfMcHGwnZcrcocIEXPuPIgcTTrQ9HDH1Pbm --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -40,5 +40,5 @@ ALTER TABLE logging.store_app_no_creatives OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict fvX6fc0Gg9RG3Orah7Q6PiPLpcHsiEI1eRCqIon2aJwETxtLkjDQ1r4CFqVAWKY +\unrestrict bsHE1eQqGSOVHFM4yj2syBtyp4lKtfMcHGwnZcrcocIEXPuPIgcTTrQ9HDH1Pbm diff --git a/pg-ddl/schema/logging/store_app_sources.sql b/pg-ddl/schema/logging/store_app_sources.sql index ec5c662e..f8c27ad7 100644 --- a/pg-ddl/schema/logging/store_app_sources.sql +++ b/pg-ddl/schema/logging/store_app_sources.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict eTVomzTH9XuZC3k3Rn8AWNz9r2ASbO3b6BjEnvIcQOKJrmoxzW8Ct0722RYMCzi +\restrict 9VFmZh1jUM1Bqp34zb9tIV7byn3yGks9xJk4kc56EEZbV6VJlYbDePee6K7nNmn --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -64,5 +64,5 @@ ALTER TABLE ONLY logging.store_app_sources -- PostgreSQL database dump complete -- -\unrestrict eTVomzTH9XuZC3k3Rn8AWNz9r2ASbO3b6BjEnvIcQOKJrmoxzW8Ct0722RYMCzi +\unrestrict 9VFmZh1jUM1Bqp34zb9tIV7byn3yGks9xJk4kc56EEZbV6VJlYbDePee6K7nNmn diff --git a/pg-ddl/schema/logging/store_app_waydroid_crawled_at.sql b/pg-ddl/schema/logging/store_app_waydroid_crawled_at.sql index 98d86ce2..ee0580ab 100644 --- a/pg-ddl/schema/logging/store_app_waydroid_crawled_at.sql +++ b/pg-ddl/schema/logging/store_app_waydroid_crawled_at.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict yIYIk6xIxuSTfRTucujR1iv9qX22VMlOJurslawFnYPSRWL8UEYtrly1Jaxh0vc +\restrict GIsi4qmZys2qEEf0fHrlkQx7vPbnVDaygPdpyM0v4BrkxJBdtO7zVc8nqbaZ3Jt --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -63,5 +63,5 @@ ALTER TABLE ONLY logging.store_app_waydroid_crawled_at -- PostgreSQL database dump complete -- -\unrestrict yIYIk6xIxuSTfRTucujR1iv9qX22VMlOJurslawFnYPSRWL8UEYtrly1Jaxh0vc +\unrestrict GIsi4qmZys2qEEf0fHrlkQx7vPbnVDaygPdpyM0v4BrkxJBdtO7zVc8nqbaZ3Jt diff --git a/pg-ddl/schema/logging/store_apps_audit.sql b/pg-ddl/schema/logging/store_apps_audit.sql deleted file mode 100644 index ee5c0e80..00000000 --- a/pg-ddl/schema/logging/store_apps_audit.sql +++ /dev/null @@ -1,55 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict 7QO7A7FKXtsfEv8FXNSXcV5HNcX4dNmmRoAx6kaVf0kbm427Bj2mLvRHUnp8Yjq - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: store_apps_audit; Type: TABLE; Schema: logging; Owner: postgres --- - -CREATE TABLE logging.store_apps_audit ( - operation character(1) NOT NULL, - stamp timestamp without time zone NOT NULL, - userid text NOT NULL, - row_id bigint NOT NULL, - store smallint NOT NULL, - store_id text NOT NULL, - crawl_result integer -); - - -ALTER TABLE logging.store_apps_audit OWNER TO postgres; - --- --- Name: store_apps_audit_stamp_idx; Type: INDEX; Schema: logging; Owner: postgres --- - -CREATE INDEX store_apps_audit_stamp_idx ON logging.store_apps_audit USING btree (stamp); - - --- --- PostgreSQL database dump complete --- - -\unrestrict 7QO7A7FKXtsfEv8FXNSXcV5HNcX4dNmmRoAx6kaVf0kbm427Bj2mLvRHUnp8Yjq - diff --git a/pg-ddl/schema/logging/store_apps_snapshot.sql b/pg-ddl/schema/logging/store_apps_snapshot.sql index 377fcfbb..f8d636e8 100644 --- a/pg-ddl/schema/logging/store_apps_snapshot.sql +++ b/pg-ddl/schema/logging/store_apps_snapshot.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict qecG5RHvsy7qapOyRgCf4gMicRza78Kdic8UtVxKPsiSL53N6Kuh4ikRswZyDkT +\restrict qb2zs7efhEIrexfJb8qO6phvGb8LJdHM6EImnWvVfdhU3coOoHMxI2Hxvk5dGXd --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -44,5 +44,5 @@ ALTER TABLE logging.store_apps_snapshot OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict qecG5RHvsy7qapOyRgCf4gMicRza78Kdic8UtVxKPsiSL53N6Kuh4ikRswZyDkT +\unrestrict qb2zs7efhEIrexfJb8qO6phvGb8LJdHM6EImnWvVfdhU3coOoHMxI2Hxvk5dGXd diff --git a/pg-ddl/schema/logging/version_code_api_scan_results.sql b/pg-ddl/schema/logging/version_code_api_scan_results.sql index 11e95fac..0005804e 100644 --- a/pg-ddl/schema/logging/version_code_api_scan_results.sql +++ b/pg-ddl/schema/logging/version_code_api_scan_results.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict rsOBxkCgHnywyg1lAI7lhgdNB7bHBbyz2sdNgDeURVBAxf4cbzI9Mo90EzChtFH +\restrict khzeSQsnAWTuu1GA96D9a4acx9uL3yu8bnL8oNFJKMaLXXM0GpdDqJmh8wes9uk --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -42,5 +42,5 @@ ALTER TABLE logging.version_code_api_scan_results OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict rsOBxkCgHnywyg1lAI7lhgdNB7bHBbyz2sdNgDeURVBAxf4cbzI9Mo90EzChtFH +\unrestrict khzeSQsnAWTuu1GA96D9a4acx9uL3yu8bnL8oNFJKMaLXXM0GpdDqJmh8wes9uk diff --git a/pg-ddl/schema/public/ad_network_sdk_keys__matview.sql b/pg-ddl/schema/public/ad_network_sdk_keys__matview.sql index d064cb02..0d01a397 100644 --- a/pg-ddl/schema/public/ad_network_sdk_keys__matview.sql +++ b/pg-ddl/schema/public/ad_network_sdk_keys__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict cDUam4m9gNzcHaCB9hb4k9bekTmWZIUBR0t8OFP22ZLn7giKYBijeurYpBKAGb1 +\restrict cmYOrybzGuaHZhE3RknrLSJNJ8gJsbfiCYjMCqXbO4qFrQavKojkgCCZjza0C7M --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -60,5 +60,5 @@ ALTER MATERIALIZED VIEW public.ad_network_sdk_keys OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict cDUam4m9gNzcHaCB9hb4k9bekTmWZIUBR0t8OFP22ZLn7giKYBijeurYpBKAGb1 +\unrestrict cmYOrybzGuaHZhE3RknrLSJNJ8gJsbfiCYjMCqXbO4qFrQavKojkgCCZjza0C7M diff --git a/pg-ddl/schema/public/adstxt_crawl_results.sql b/pg-ddl/schema/public/adstxt_crawl_results.sql index 5e4854c6..0736dafd 100644 --- a/pg-ddl/schema/public/adstxt_crawl_results.sql +++ b/pg-ddl/schema/public/adstxt_crawl_results.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 8f83kbJqGwnmuxilRHDGIPBO14kZEHcMp656e8I8uACRVXh2Dh6tog4VdpN9Jo8 +\restrict jcAooXEnJQkc0T5rOitoFytHKwzmGmL9GoxQtT6myIAVYkR8oD9sQdjobPh5pB0 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -96,5 +96,5 @@ ALTER TABLE ONLY public.adstxt_crawl_results -- PostgreSQL database dump complete -- -\unrestrict 8f83kbJqGwnmuxilRHDGIPBO14kZEHcMp656e8I8uACRVXh2Dh6tog4VdpN9Jo8 +\unrestrict jcAooXEnJQkc0T5rOitoFytHKwzmGmL9GoxQtT6myIAVYkR8oD9sQdjobPh5pB0 diff --git a/pg-ddl/schema/public/api_calls.sql b/pg-ddl/schema/public/api_calls.sql index 454dc62b..6869c0fa 100644 --- a/pg-ddl/schema/public/api_calls.sql +++ b/pg-ddl/schema/public/api_calls.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict kr2eGl7acl5vcz7SysJycxiD6rUUsiXGj0UU5zv41zf66f5F4BSi8A0nCJ73mOd +\restrict saoMeu5Y0TkNkHzwVas2njG26f0XxtaKx23fvESiuYc454GPf8Y0ox67u50uSYc --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -135,5 +135,5 @@ ALTER TABLE ONLY public.api_calls -- PostgreSQL database dump complete -- -\unrestrict kr2eGl7acl5vcz7SysJycxiD6rUUsiXGj0UU5zv41zf66f5F4BSi8A0nCJ73mOd +\unrestrict saoMeu5Y0TkNkHzwVas2njG26f0XxtaKx23fvESiuYc454GPf8Y0ox67u50uSYc diff --git a/pg-ddl/schema/public/app_ads_entrys.sql b/pg-ddl/schema/public/app_ads_entrys.sql index 8cb1ac09..40ec259f 100644 --- a/pg-ddl/schema/public/app_ads_entrys.sql +++ b/pg-ddl/schema/public/app_ads_entrys.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict w6gBiDOTCPO8ZPETbh7tpy3cP2s51toxwYWO6teY8ieLeZu6UD5bGztKUhmgswX +\restrict vRVRDtIUaYXlGxoa5s8aSbemATRLvAtdRy6bePr0OSsF5fLrv4S5A32ESu34XrF --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -90,5 +90,5 @@ ALTER TABLE ONLY public.app_ads_entrys -- PostgreSQL database dump complete -- -\unrestrict w6gBiDOTCPO8ZPETbh7tpy3cP2s51toxwYWO6teY8ieLeZu6UD5bGztKUhmgswX +\unrestrict vRVRDtIUaYXlGxoa5s8aSbemATRLvAtdRy6bePr0OSsF5fLrv4S5A32ESu34XrF diff --git a/pg-ddl/schema/public/app_ads_map.sql b/pg-ddl/schema/public/app_ads_map.sql index ecd487fe..b78cc3cd 100644 --- a/pg-ddl/schema/public/app_ads_map.sql +++ b/pg-ddl/schema/public/app_ads_map.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict JcxT3ucH0tVhZCnICBI4zCDoAW2HeFFTPA7DtUZGpLaH5rZjnnmcRNWPnMGqLYs +\restrict 3nRm3C25LjvyOcnGbjsqSGdutLYiM5JeDnFMgd8gFqNc4x0qWSeM8k47Vog5GG6 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -95,5 +95,5 @@ ALTER TABLE ONLY public.app_ads_map -- PostgreSQL database dump complete -- -\unrestrict JcxT3ucH0tVhZCnICBI4zCDoAW2HeFFTPA7DtUZGpLaH5rZjnnmcRNWPnMGqLYs +\unrestrict 3nRm3C25LjvyOcnGbjsqSGdutLYiM5JeDnFMgd8gFqNc4x0qWSeM8k47Vog5GG6 diff --git a/pg-ddl/schema/public/app_country_metrics_history.sql b/pg-ddl/schema/public/app_country_metrics_history.sql index af706565..ac1dd50a 100644 --- a/pg-ddl/schema/public/app_country_metrics_history.sql +++ b/pg-ddl/schema/public/app_country_metrics_history.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict vxGV2twtbTpWC176QwyDYAMnLUNXKMhdzgFtEq0wjIoz0hkJpqX8FLE9ckY4T0e +\restrict ANh2lWa2FfRbz3rXRBv9rs0NO5YM7vEgUAZEIpezN7nM4peG00h2NwHwOxchhca --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -85,5 +85,5 @@ ALTER TABLE ONLY public.app_country_metrics_history -- PostgreSQL database dump complete -- -\unrestrict vxGV2twtbTpWC176QwyDYAMnLUNXKMhdzgFtEq0wjIoz0hkJpqX8FLE9ckY4T0e +\unrestrict ANh2lWa2FfRbz3rXRBv9rs0NO5YM7vEgUAZEIpezN7nM4peG00h2NwHwOxchhca diff --git a/pg-ddl/schema/public/app_country_metrics_latest__matview.sql b/pg-ddl/schema/public/app_country_metrics_latest__matview.sql index 51f5ef0d..5788feda 100644 --- a/pg-ddl/schema/public/app_country_metrics_latest__matview.sql +++ b/pg-ddl/schema/public/app_country_metrics_latest__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict OsaEeiR7j7L2a3Mvf5WpHcfnrMET1UHjY6jAuDFG1CM8tKXwMj8C0TJbjMi5zVk +\restrict j5QcL8IA3kWwqZtBFAHFg16mITE4quJPSb5a3NFJohzei4DwT8hN178VpTbgHyL --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -57,5 +57,5 @@ CREATE UNIQUE INDEX app_country_metrics_latest_idx ON public.app_country_metrics -- PostgreSQL database dump complete -- -\unrestrict OsaEeiR7j7L2a3Mvf5WpHcfnrMET1UHjY6jAuDFG1CM8tKXwMj8C0TJbjMi5zVk +\unrestrict j5QcL8IA3kWwqZtBFAHFg16mITE4quJPSb5a3NFJohzei4DwT8hN178VpTbgHyL diff --git a/pg-ddl/schema/public/app_global_metrics_history.sql b/pg-ddl/schema/public/app_global_metrics_history.sql index c8d3c2ba..5f74266a 100644 --- a/pg-ddl/schema/public/app_global_metrics_history.sql +++ b/pg-ddl/schema/public/app_global_metrics_history.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 1Aw0aYh7XCtkgFz04zsDAa0LjVS8UBl97GG4KM0JkYwH6AK7Lh29CaP3TOUXCN4 +\restrict nm2Ymju9xTwTbTT6qMpGHXk6lev3UGDSl4oIVVvjJ7vGDTb41BNJEXpOHOAQwxw --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -78,5 +78,5 @@ ALTER TABLE ONLY public.app_global_metrics_history -- PostgreSQL database dump complete -- -\unrestrict 1Aw0aYh7XCtkgFz04zsDAa0LjVS8UBl97GG4KM0JkYwH6AK7Lh29CaP3TOUXCN4 +\unrestrict nm2Ymju9xTwTbTT6qMpGHXk6lev3UGDSl4oIVVvjJ7vGDTb41BNJEXpOHOAQwxw diff --git a/pg-ddl/schema/public/app_global_metrics_latest__matview.sql b/pg-ddl/schema/public/app_global_metrics_latest__matview.sql index b3b28e5d..27ae7bc0 100644 --- a/pg-ddl/schema/public/app_global_metrics_latest__matview.sql +++ b/pg-ddl/schema/public/app_global_metrics_latest__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict ey6Ywf83vSTVWS84v92RvFv15h7SCu0lDZWd7YNhSQpAGhUDr2c2eFhMhIt9RFy +\restrict YK4pATLC9BGj2r4Qd7Iu3lrhE1q6gOp8YKgacBQiLQXYea1cUhLIHEVywWeXRnj --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -58,5 +58,5 @@ CREATE UNIQUE INDEX app_global_metrics_latest_idx ON public.app_global_metrics_l -- PostgreSQL database dump complete -- -\unrestrict ey6Ywf83vSTVWS84v92RvFv15h7SCu0lDZWd7YNhSQpAGhUDr2c2eFhMhIt9RFy +\unrestrict YK4pATLC9BGj2r4Qd7Iu3lrhE1q6gOp8YKgacBQiLQXYea1cUhLIHEVywWeXRnj diff --git a/pg-ddl/schema/public/app_global_metrics_weekly_diffs__matview.sql b/pg-ddl/schema/public/app_global_metrics_weekly_diffs__matview.sql index 2fc743c9..c5544c1a 100644 --- a/pg-ddl/schema/public/app_global_metrics_weekly_diffs__matview.sql +++ b/pg-ddl/schema/public/app_global_metrics_weekly_diffs__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict Syr6lyxP1PlfUN6aJEjK7mIdtKYCHydvUmnrrt4BSmzsIHycKentN4CPT9mI8aZ +\restrict mDZ2ED3svgv0lvOGPwaUwJ3uCfrp9guFcUydUudcaupVviQujiyJ0OyppvXALct --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -62,5 +62,5 @@ CREATE UNIQUE INDEX app_global_metrics_weekly_diffs_week_start_store_app_idx ON -- PostgreSQL database dump complete -- -\unrestrict Syr6lyxP1PlfUN6aJEjK7mIdtKYCHydvUmnrrt4BSmzsIHycKentN4CPT9mI8aZ +\unrestrict mDZ2ED3svgv0lvOGPwaUwJ3uCfrp9guFcUydUudcaupVviQujiyJ0OyppvXALct diff --git a/pg-ddl/schema/public/app_keyword_rankings.sql b/pg-ddl/schema/public/app_keyword_rankings.sql deleted file mode 100644 index fff75b24..00000000 --- a/pg-ddl/schema/public/app_keyword_rankings.sql +++ /dev/null @@ -1,125 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict SNGBI54BSvVAR0OdVWplpuNGdp8xpPUm92U1BBhJJd1iFZJQWQO5j2wHtOonYXx - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: app_keyword_rankings; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE public.app_keyword_rankings ( - id integer NOT NULL, - crawled_date date NOT NULL, - country smallint NOT NULL, - lang smallint NOT NULL, - keyword integer NOT NULL, - rank smallint NOT NULL, - store_app integer NOT NULL -); - - -ALTER TABLE public.app_keyword_rankings OWNER TO postgres; - --- --- Name: app_keyword_rankings_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.app_keyword_rankings_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER SEQUENCE public.app_keyword_rankings_id_seq OWNER TO postgres; - --- --- Name: app_keyword_rankings_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.app_keyword_rankings_id_seq OWNED BY public.app_keyword_rankings.id; - - --- --- Name: app_keyword_rankings id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings ALTER COLUMN id SET DEFAULT nextval('public.app_keyword_rankings_id_seq'::regclass); - - --- --- Name: app_keyword_rankings app_keyword_rankings_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT app_keyword_rankings_pkey PRIMARY KEY (id); - - --- --- Name: app_keyword_rankings unique_keyword_ranking; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT unique_keyword_ranking UNIQUE (crawled_date, country, lang, rank, store_app, keyword); - - --- --- Name: app_keyword_rankings fk_country; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT fk_country FOREIGN KEY (country) REFERENCES public.countries(id); - - --- --- Name: app_keyword_rankings fk_language; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT fk_language FOREIGN KEY (lang) REFERENCES public.languages(id); - - --- --- Name: app_keyword_rankings fk_store_app; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT fk_store_app FOREIGN KEY (store_app) REFERENCES public.store_apps(id); - - --- --- Name: app_keyword_rankings fk_store_keyword; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.app_keyword_rankings - ADD CONSTRAINT fk_store_keyword FOREIGN KEY (keyword) REFERENCES public.keywords(id); - - --- --- PostgreSQL database dump complete --- - -\unrestrict SNGBI54BSvVAR0OdVWplpuNGdp8xpPUm92U1BBhJJd1iFZJQWQO5j2wHtOonYXx - diff --git a/pg-ddl/schema/public/app_keywords_extracted.sql b/pg-ddl/schema/public/app_keywords_extracted.sql new file mode 100644 index 00000000..da47b7c5 --- /dev/null +++ b/pg-ddl/schema/public/app_keywords_extracted.sql @@ -0,0 +1,83 @@ +-- +-- PostgreSQL database dump +-- + +\restrict 2kNgHkOe4Cc5zOpKsuSVbAOojljA0TFiI1tojFzxakpFvjE2NV7wI5T1c2AquTR + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: app_keywords_extracted; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.app_keywords_extracted ( + store_app integer NOT NULL, + keyword_id integer NOT NULL, + description_id integer NOT NULL, + extracted_at timestamp without time zone NOT NULL +); + + +ALTER TABLE public.app_keywords_extracted OWNER TO postgres; + +-- +-- Name: app_keywords_extracted description_keywords_app_id_keyword_id_key; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.app_keywords_extracted + ADD CONSTRAINT description_keywords_app_id_keyword_id_key UNIQUE (store_app, keyword_id); + + +-- +-- Name: ake_latest_idx; Type: INDEX; Schema: public; Owner: postgres +-- + +CREATE INDEX ake_latest_idx ON public.app_keywords_extracted USING btree (store_app, extracted_at DESC); + + +-- +-- Name: app_keywords_app_index; Type: INDEX; Schema: public; Owner: postgres +-- + +CREATE INDEX app_keywords_app_index ON public.app_keywords_extracted USING btree (store_app); + + +-- +-- Name: app_keywords_extracted description_keywords_app_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.app_keywords_extracted + ADD CONSTRAINT description_keywords_app_id_fkey FOREIGN KEY (store_app) REFERENCES public.store_apps(id) ON DELETE CASCADE; + + +-- +-- Name: app_keywords_extracted description_keywords_keyword_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.app_keywords_extracted + ADD CONSTRAINT description_keywords_keyword_id_fkey FOREIGN KEY (keyword_id) REFERENCES public.keywords(id) ON DELETE CASCADE; + + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict 2kNgHkOe4Cc5zOpKsuSVbAOojljA0TFiI1tojFzxakpFvjE2NV7wI5T1c2AquTR + diff --git a/pg-ddl/schema/public/app_urls_map.sql b/pg-ddl/schema/public/app_urls_map.sql index 323e6f95..185ad2c4 100644 --- a/pg-ddl/schema/public/app_urls_map.sql +++ b/pg-ddl/schema/public/app_urls_map.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict bJOFTB0z0GSHNuc5OipcOB5ROFyZh5ncUmFskMBFS5lB8UkbngLPbYDPOWBdqoe +\restrict bzEPyaHctOoFfbYW5YZznwFYmYlliesLFhokDVB1wqF3vNoQ4ouq9ydbcgYVAkR --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -95,5 +95,5 @@ ALTER TABLE ONLY public.app_urls_map -- PostgreSQL database dump complete -- -\unrestrict bJOFTB0z0GSHNuc5OipcOB5ROFyZh5ncUmFskMBFS5lB8UkbngLPbYDPOWBdqoe +\unrestrict bzEPyaHctOoFfbYW5YZznwFYmYlliesLFhokDVB1wqF3vNoQ4ouq9ydbcgYVAkR diff --git a/pg-ddl/schema/public/audit_dates__matview.sql b/pg-ddl/schema/public/audit_dates__matview.sql deleted file mode 100644 index 5e06366b..00000000 --- a/pg-ddl/schema/public/audit_dates__matview.sql +++ /dev/null @@ -1,59 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict Du8LobT4GNc4oWZ4FUNP9p22ghKeE9Xn3bZwZADSSptVopVRIPeQeydGXC1Mask - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: audit_dates; Type: MATERIALIZED VIEW; Schema: public; Owner: postgres --- - -CREATE MATERIALIZED VIEW public.audit_dates AS - WITH sa AS ( - SELECT (store_apps_audit.stamp)::date AS updated_date, - 'store_apps'::text AS table_name, - count(*) AS updated_count - FROM logging.store_apps_audit - GROUP BY ((store_apps_audit.stamp)::date) - ) - SELECT updated_date, - table_name, - updated_count - FROM sa - WITH NO DATA; - - -ALTER MATERIALIZED VIEW public.audit_dates OWNER TO postgres; - --- --- Name: audit_dates_updated_date_idx; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE UNIQUE INDEX audit_dates_updated_date_idx ON public.audit_dates USING btree (updated_date, table_name); - - --- --- PostgreSQL database dump complete --- - -\unrestrict Du8LobT4GNc4oWZ4FUNP9p22ghKeE9Xn3bZwZADSSptVopVRIPeQeydGXC1Mask - diff --git a/pg-ddl/schema/public/category_mapping__matview.sql b/pg-ddl/schema/public/category_mapping__matview.sql index f3d62bf8..76158859 100644 --- a/pg-ddl/schema/public/category_mapping__matview.sql +++ b/pg-ddl/schema/public/category_mapping__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 15Fme94QDfvdOAQsirbuhpVZRfIXzNUqrTYZyCTX6qNI25KEx256OCtbatynNUK +\restrict WExrMK6UFYIpqtVe0dRMeLxCd8T2BTlgp99IBzZg7eZrShNZvhxsfaRs9IlB1PG --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -67,5 +67,5 @@ CREATE UNIQUE INDEX category_mapping_idx ON public.category_mapping USING btree -- PostgreSQL database dump complete -- -\unrestrict 15Fme94QDfvdOAQsirbuhpVZRfIXzNUqrTYZyCTX6qNI25KEx256OCtbatynNUK +\unrestrict WExrMK6UFYIpqtVe0dRMeLxCd8T2BTlgp99IBzZg7eZrShNZvhxsfaRs9IlB1PG diff --git a/pg-ddl/schema/public/countries.sql b/pg-ddl/schema/public/countries.sql index 3a8b86ca..3538b1f5 100644 --- a/pg-ddl/schema/public/countries.sql +++ b/pg-ddl/schema/public/countries.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict ndhaj1HVcXLcGvyaML686KiT3rLvRiJ4xTjXFhNJV3n3FvxEzAvPeNJvX2ZJp5x +\restrict LFwZdqDfg6GRjKe2GcSx8Y0UizEGvOHy0msSAJcLwZ9rdlcJVPK47bzMF5xXpqQ --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -71,5 +71,5 @@ ALTER TABLE ONLY public.countries -- PostgreSQL database dump complete -- -\unrestrict ndhaj1HVcXLcGvyaML686KiT3rLvRiJ4xTjXFhNJV3n3FvxEzAvPeNJvX2ZJp5x +\unrestrict LFwZdqDfg6GRjKe2GcSx8Y0UizEGvOHy0msSAJcLwZ9rdlcJVPK47bzMF5xXpqQ diff --git a/pg-ddl/schema/public/crawl_results.sql b/pg-ddl/schema/public/crawl_results.sql index 79f5d973..fdb41a22 100644 --- a/pg-ddl/schema/public/crawl_results.sql +++ b/pg-ddl/schema/public/crawl_results.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict MrVmeWCGBx5diZAJGxdlqYYaba8T0OyhFtxIwNOnbgtyy8qGOLLXWItFdNLjJAU +\restrict U6TvCc99jF2eF8T2Cp0evS9MQUeIYqKs8SzHZzPH5plI5XSqSDIVc4EbTMiXOeN --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -61,5 +61,5 @@ ALTER TABLE ONLY public.crawl_results -- PostgreSQL database dump complete -- -\unrestrict MrVmeWCGBx5diZAJGxdlqYYaba8T0OyhFtxIwNOnbgtyy8qGOLLXWItFdNLjJAU +\unrestrict U6TvCc99jF2eF8T2Cp0evS9MQUeIYqKs8SzHZzPH5plI5XSqSDIVc4EbTMiXOeN diff --git a/pg-ddl/schema/public/crawl_scenario_country_config.sql b/pg-ddl/schema/public/crawl_scenario_country_config.sql index ea382318..0feddb09 100644 --- a/pg-ddl/schema/public/crawl_scenario_country_config.sql +++ b/pg-ddl/schema/public/crawl_scenario_country_config.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 21pqut2sc0AD76gqcLjKLVKITMoB9MhDjsyMq2YI0J68S584wvD8Zlf6aeGY8OZ +\restrict i59UmFsuQCs2tysMaN1Ow2YrTS1oSeSgabfeUP26z8iubLg2J5grhz6C5jMaiiV --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -112,5 +112,5 @@ ALTER TABLE ONLY public.crawl_scenario_country_config -- PostgreSQL database dump complete -- -\unrestrict 21pqut2sc0AD76gqcLjKLVKITMoB9MhDjsyMq2YI0J68S584wvD8Zlf6aeGY8OZ +\unrestrict i59UmFsuQCs2tysMaN1Ow2YrTS1oSeSgabfeUP26z8iubLg2J5grhz6C5jMaiiV diff --git a/pg-ddl/schema/public/crawl_scenarios.sql b/pg-ddl/schema/public/crawl_scenarios.sql index b567f9e7..505636c7 100644 --- a/pg-ddl/schema/public/crawl_scenarios.sql +++ b/pg-ddl/schema/public/crawl_scenarios.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict NaJrPQdbzJGCQzxmSjDqyBt7do5wYKEPtCcgnRrvbqOXaEZCtHcHqCMguata4s0 +\restrict Itk79obb0ZaxGFj72tSvnCCxscB2hj2KTiStDo2SmKa0XZh2Meb3lD0XjejUcfQ --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -86,5 +86,5 @@ ALTER TABLE ONLY public.crawl_scenarios -- PostgreSQL database dump complete -- -\unrestrict NaJrPQdbzJGCQzxmSjDqyBt7do5wYKEPtCcgnRrvbqOXaEZCtHcHqCMguata4s0 +\unrestrict Itk79obb0ZaxGFj72tSvnCCxscB2hj2KTiStDo2SmKa0XZh2Meb3lD0XjejUcfQ diff --git a/pg-ddl/schema/public/creative_assets.sql b/pg-ddl/schema/public/creative_assets.sql index 18f528cb..0087f2e0 100644 --- a/pg-ddl/schema/public/creative_assets.sql +++ b/pg-ddl/schema/public/creative_assets.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict eaurBbWOC1gZQgrNaZROgMSs2YLSoVX8yag8qt1aGrD6HMfSj3OyHHJRpgQ9pud +\restrict EJmNRXMOYGkSuG96SvsciQc1tXEwaZHnM8WYiUwMsVFSe9t3f5oYHfdccLcwDKf --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -94,5 +94,5 @@ CREATE INDEX idx_creative_assets_phash ON public.creative_assets USING btree (ph -- PostgreSQL database dump complete -- -\unrestrict eaurBbWOC1gZQgrNaZROgMSs2YLSoVX8yag8qt1aGrD6HMfSj3OyHHJRpgQ9pud +\unrestrict EJmNRXMOYGkSuG96SvsciQc1tXEwaZHnM8WYiUwMsVFSe9t3f5oYHfdccLcwDKf diff --git a/pg-ddl/schema/public/creative_records.sql b/pg-ddl/schema/public/creative_records.sql index cd278f53..b0dd44b9 100644 --- a/pg-ddl/schema/public/creative_records.sql +++ b/pg-ddl/schema/public/creative_records.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict IdwAQS9e8up1ZydKhUjehmeVxBgbAc1P62LEjyU99PRhjk9ukz1eg0jkjgaAKoR +\restrict 7CbQsFDCtCuP8e8vaeTQPYLdaCbGXrxYDoEre209MeWKz7QtzUOQMZP6wevqxdi --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -40,6 +40,8 @@ CREATE TABLE public.creative_records ( additional_ad_domain_ids integer[], created_at timestamp with time zone DEFAULT timezone('utc'::text, now()) NOT NULL, updated_at timestamp with time zone DEFAULT timezone('utc'::text, now()) NOT NULL, + click_ids integer[], + click_url_ids integer[], CONSTRAINT check_advertiser_or_advertiser_domain CHECK (((advertiser_store_app_id IS NOT NULL) OR (advertiser_domain_id IS NOT NULL) OR ((advertiser_store_app_id IS NULL) AND (advertiser_domain_id IS NULL)))) ); @@ -151,5 +153,5 @@ ALTER TABLE ONLY public.creative_records -- PostgreSQL database dump complete -- -\unrestrict IdwAQS9e8up1ZydKhUjehmeVxBgbAc1P62LEjyU99PRhjk9ukz1eg0jkjgaAKoR +\unrestrict 7CbQsFDCtCuP8e8vaeTQPYLdaCbGXrxYDoEre209MeWKz7QtzUOQMZP6wevqxdi diff --git a/pg-ddl/schema/public/description_keywords.sql b/pg-ddl/schema/public/description_keywords.sql deleted file mode 100644 index 4d5ace30..00000000 --- a/pg-ddl/schema/public/description_keywords.sql +++ /dev/null @@ -1,106 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict ojHZNx8zTR0q0EPSiuWrdI2ts68hNtQjFAuIa7gZhXbC2LthlXyveNpzRPJIrm4 - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: description_keywords; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE public.description_keywords ( - id integer NOT NULL, - description_id integer NOT NULL, - keyword_id integer NOT NULL, - extracted_at timestamp without time zone DEFAULT now() NOT NULL -); - - -ALTER TABLE public.description_keywords OWNER TO postgres; - --- --- Name: description_keywords_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.description_keywords_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER SEQUENCE public.description_keywords_id_seq OWNER TO postgres; - --- --- Name: description_keywords_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.description_keywords_id_seq OWNED BY public.description_keywords.id; - - --- --- Name: description_keywords id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.description_keywords ALTER COLUMN id SET DEFAULT nextval('public.description_keywords_id_seq'::regclass); - - --- --- Name: description_keywords description_keywords_description_id_keyword_id_key; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.description_keywords - ADD CONSTRAINT description_keywords_description_id_keyword_id_key UNIQUE (description_id, keyword_id); - - --- --- Name: description_keywords description_keywords_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.description_keywords - ADD CONSTRAINT description_keywords_pkey PRIMARY KEY (id); - - --- --- Name: description_keywords description_keywords_description_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.description_keywords - ADD CONSTRAINT description_keywords_description_id_fkey FOREIGN KEY (description_id) REFERENCES public.store_apps_descriptions(id) ON DELETE CASCADE; - - --- --- Name: description_keywords description_keywords_keyword_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.description_keywords - ADD CONSTRAINT description_keywords_keyword_id_fkey FOREIGN KEY (keyword_id) REFERENCES public.keywords(id) ON DELETE CASCADE; - - --- --- PostgreSQL database dump complete --- - -\unrestrict ojHZNx8zTR0q0EPSiuWrdI2ts68hNtQjFAuIa7gZhXbC2LthlXyveNpzRPJIrm4 - diff --git a/pg-ddl/schema/public/developer_store_apps__matview.sql b/pg-ddl/schema/public/developer_store_apps__matview.sql index 200f92c4..2dff8858 100644 --- a/pg-ddl/schema/public/developer_store_apps__matview.sql +++ b/pg-ddl/schema/public/developer_store_apps__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict ktyHtxXW2zqnXoVrohSBA1G8pgbanDnB2VjUDrck4NeNWK0oxxzado8OSQNfy5p +\restrict 2BBfJY8LdT9ootMyP3qTLaLhUcqb2PwQv9yJOBL2EipzqlCykq8RaVH0h1h9hUo --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -83,5 +83,5 @@ CREATE UNIQUE INDEX idx_developer_store_apps_unique ON public.developer_store_ap -- PostgreSQL database dump complete -- -\unrestrict ktyHtxXW2zqnXoVrohSBA1G8pgbanDnB2VjUDrck4NeNWK0oxxzado8OSQNfy5p +\unrestrict 2BBfJY8LdT9ootMyP3qTLaLhUcqb2PwQv9yJOBL2EipzqlCykq8RaVH0h1h9hUo diff --git a/pg-ddl/schema/public/developers.sql b/pg-ddl/schema/public/developers.sql index e450a216..f78e336e 100644 --- a/pg-ddl/schema/public/developers.sql +++ b/pg-ddl/schema/public/developers.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict w0CaObwYq8W5T7sdfm90OfnoaLbMWvYanwi1rVlLhPcxvFhWgnn7rdHnY1jyxeX +\restrict wRRllJR0wvRp5JquQH5c9JhWkaHlZ1ekgaeEXtfvhOZHPmU0mx7sUmrO86L0JSI --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -110,5 +110,5 @@ ALTER TABLE ONLY public.developers -- PostgreSQL database dump complete -- -\unrestrict w0CaObwYq8W5T7sdfm90OfnoaLbMWvYanwi1rVlLhPcxvFhWgnn7rdHnY1jyxeX +\unrestrict wRRllJR0wvRp5JquQH5c9JhWkaHlZ1ekgaeEXtfvhOZHPmU0mx7sUmrO86L0JSI diff --git a/pg-ddl/schema/public/domains.sql b/pg-ddl/schema/public/domains.sql index 8eadc980..722c7837 100644 --- a/pg-ddl/schema/public/domains.sql +++ b/pg-ddl/schema/public/domains.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict KcrUAenCecgrhny9MUUeVx2K5ziKYhZZb5bOUZUTrQslzKshWiZi1kUCGE9n15n +\restrict wOjrrCZitMYk6shcm5kppPtKhS4tmlGZZcmSGLPakUt7mhPcA90ebzSH6awqGBD --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -85,5 +85,5 @@ ALTER TABLE ONLY public.domains -- PostgreSQL database dump complete -- -\unrestrict KcrUAenCecgrhny9MUUeVx2K5ziKYhZZb5bOUZUTrQslzKshWiZi1kUCGE9n15n +\unrestrict wOjrrCZitMYk6shcm5kppPtKhS4tmlGZZcmSGLPakUt7mhPcA90ebzSH6awqGBD diff --git a/pg-ddl/schema/public/ip_geo_snapshots.sql b/pg-ddl/schema/public/ip_geo_snapshots.sql index 393671ec..695c0517 100644 --- a/pg-ddl/schema/public/ip_geo_snapshots.sql +++ b/pg-ddl/schema/public/ip_geo_snapshots.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict LsXFnGfMehdR6jnwzVVfscbKiBE3w3KSqO6rauOizFVaK5ZdEJMcdqKws21V9w9 +\restrict F1qIU60Zq8AlbLwAlIQr6RebjlfLTCynJF52utiTwPctcq6yrHwpfI32W6z2OdN --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -112,5 +112,5 @@ ALTER TABLE ONLY public.ip_geo_snapshots -- PostgreSQL database dump complete -- -\unrestrict LsXFnGfMehdR6jnwzVVfscbKiBE3w3KSqO6rauOizFVaK5ZdEJMcdqKws21V9w9 +\unrestrict F1qIU60Zq8AlbLwAlIQr6RebjlfLTCynJF52utiTwPctcq6yrHwpfI32W6z2OdN diff --git a/pg-ddl/schema/public/keywords.sql b/pg-ddl/schema/public/keywords.sql index f861675d..17f15276 100644 --- a/pg-ddl/schema/public/keywords.sql +++ b/pg-ddl/schema/public/keywords.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict KyGZGdd4Hf9KvXzvat2oHbIaEsuGxDElexd1USPo9HFgtcJXn6Mqlhmu7wvhcOM +\restrict 2zylZwg5BQghQ5H6uQt1s47utupYFzdGok1OACfODZ6MOeSylC0u35hVKleboGX --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -84,5 +84,5 @@ ALTER TABLE ONLY public.keywords -- PostgreSQL database dump complete -- -\unrestrict KyGZGdd4Hf9KvXzvat2oHbIaEsuGxDElexd1USPo9HFgtcJXn6Mqlhmu7wvhcOM +\unrestrict 2zylZwg5BQghQ5H6uQt1s47utupYFzdGok1OACfODZ6MOeSylC0u35hVKleboGX diff --git a/pg-ddl/schema/public/keywords_base.sql b/pg-ddl/schema/public/keywords_base.sql index 6d41f69f..5a143982 100644 --- a/pg-ddl/schema/public/keywords_base.sql +++ b/pg-ddl/schema/public/keywords_base.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict JDwNpNHaOsP6gpSqGwhod1VWu9PHgZ08u2FzuXlQfboyGsucZUvyn7nDfEemlww +\restrict eF9CzstVnV1BmKfqApJJwiEcKPGvZ1SQA3Viw37HbaRehLK2dd3hHk5iCGpJCc0 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -38,5 +38,5 @@ ALTER TABLE public.keywords_base OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict JDwNpNHaOsP6gpSqGwhod1VWu9PHgZ08u2FzuXlQfboyGsucZUvyn7nDfEemlww +\unrestrict eF9CzstVnV1BmKfqApJJwiEcKPGvZ1SQA3Viw37HbaRehLK2dd3hHk5iCGpJCc0 diff --git a/pg-ddl/schema/public/languages.sql b/pg-ddl/schema/public/languages.sql index fea97a38..4596a527 100644 --- a/pg-ddl/schema/public/languages.sql +++ b/pg-ddl/schema/public/languages.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict r3wLFoiUA0wG4OFVdYrLsX7ITQVPkxKypcnxVBFk1g1UiEUYbcFydR3Iv4eqFty +\restrict QIABSwxg8yaZzbGp9vb7BcHVHjU0YAbXIc2rEKsvtPBrifFE9SvXJX8O6DXTtrO --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -84,5 +84,5 @@ ALTER TABLE ONLY public.languages -- PostgreSQL database dump complete -- -\unrestrict r3wLFoiUA0wG4OFVdYrLsX7ITQVPkxKypcnxVBFk1g1UiEUYbcFydR3Iv4eqFty +\unrestrict QIABSwxg8yaZzbGp9vb7BcHVHjU0YAbXIc2rEKsvtPBrifFE9SvXJX8O6DXTtrO diff --git a/pg-ddl/schema/public/mv_app_categories__matview.sql b/pg-ddl/schema/public/mv_app_categories__matview.sql index ef90599f..507401dd 100644 --- a/pg-ddl/schema/public/mv_app_categories__matview.sql +++ b/pg-ddl/schema/public/mv_app_categories__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict oM0Vf0mD1LB5K8UjhrDx5nISV8aVbSUbb5VYpgYzkVsEc0Fc1CjgKXDO3ZhsevB +\restrict auwbx07U10faxdmK290GMUSAo46vh1agX8I4wYBkTGTtXqCEbsrLgsPmkkommj0 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -45,5 +45,5 @@ ALTER MATERIALIZED VIEW public.mv_app_categories OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict oM0Vf0mD1LB5K8UjhrDx5nISV8aVbSUbb5VYpgYzkVsEc0Fc1CjgKXDO3ZhsevB +\unrestrict auwbx07U10faxdmK290GMUSAo46vh1agX8I4wYBkTGTtXqCEbsrLgsPmkkommj0 diff --git a/pg-ddl/schema/public/pg_stat_statements__view.sql b/pg-ddl/schema/public/pg_stat_statements__view.sql index 536ad99b..a256a47f 100644 --- a/pg-ddl/schema/public/pg_stat_statements__view.sql +++ b/pg-ddl/schema/public/pg_stat_statements__view.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict gTJx4g4XTJGyB9SkP5ANpSgarTXiPuHHegc4ZWHiuVy36gnVXLG01nWMUUmfo0M +\restrict pgCevHIkfzk6cYDORIVWlrI15dEGbSS6NFfOOszW071YZYe024qhyfVGzDk3bZO --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -23,5 +23,5 @@ SET row_security = off; -- PostgreSQL database dump complete -- -\unrestrict gTJx4g4XTJGyB9SkP5ANpSgarTXiPuHHegc4ZWHiuVy36gnVXLG01nWMUUmfo0M +\unrestrict pgCevHIkfzk6cYDORIVWlrI15dEGbSS6NFfOOszW071YZYe024qhyfVGzDk3bZO diff --git a/pg-ddl/schema/public/pg_stat_statements_info__view.sql b/pg-ddl/schema/public/pg_stat_statements_info__view.sql index c913f24e..c6c16cfe 100644 --- a/pg-ddl/schema/public/pg_stat_statements_info__view.sql +++ b/pg-ddl/schema/public/pg_stat_statements_info__view.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict JBtNae68HBfe5QCf3QUXKbp74CiFqoGDX4JnnRn8MT5vePz60oOyTsb27Py47Qh +\restrict 40Qt4JBMshH5CU3ztXMINqc7Vj6RfxBrcdUPYBKbYPzzLVlygeVduggxeAE0COo --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -23,5 +23,5 @@ SET row_security = off; -- PostgreSQL database dump complete -- -\unrestrict JBtNae68HBfe5QCf3QUXKbp74CiFqoGDX4JnnRn8MT5vePz60oOyTsb27Py47Qh +\unrestrict 40Qt4JBMshH5CU3ztXMINqc7Vj6RfxBrcdUPYBKbYPzzLVlygeVduggxeAE0COo diff --git a/pg-ddl/schema/public/platforms.sql b/pg-ddl/schema/public/platforms.sql index 9c2be295..2e5fbe44 100644 --- a/pg-ddl/schema/public/platforms.sql +++ b/pg-ddl/schema/public/platforms.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict aERR5MS4qHryypMEnBCyuTbrkUXeetAMvNkmDNAQcowfNTBpDRbccH9x8sWgBJM +\restrict Ir4klirIuP4alY1VLoNGB7Gb8bLhwxpATzSOZLdcH8NFn2WgxxB7gImhBxGZDYS --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -84,5 +84,5 @@ ALTER TABLE ONLY public.platforms -- PostgreSQL database dump complete -- -\unrestrict aERR5MS4qHryypMEnBCyuTbrkUXeetAMvNkmDNAQcowfNTBpDRbccH9x8sWgBJM +\unrestrict Ir4klirIuP4alY1VLoNGB7Gb8bLhwxpATzSOZLdcH8NFn2WgxxB7gImhBxGZDYS diff --git a/pg-ddl/schema/public/retention_global_benchmarks.sql b/pg-ddl/schema/public/retention_global_benchmarks.sql new file mode 100644 index 00000000..da3af860 --- /dev/null +++ b/pg-ddl/schema/public/retention_global_benchmarks.sql @@ -0,0 +1,58 @@ +-- +-- PostgreSQL database dump +-- + +\restrict ZzbmOlxhPhLVUWDYI4sQ9mOAKsGLkJ3PEtJskHKE7wlaivPpClx0Cn2WGebJ9zH + +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: global_retention_benchmarks; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.global_retention_benchmarks ( + store_id smallint NOT NULL, + app_category text NOT NULL, + d1 numeric(6,5) NOT NULL, + d7 numeric(6,5) NOT NULL, + d30 numeric(6,5) NOT NULL, + CONSTRAINT global_retention_benchmarks_d1_check CHECK (((d1 > (0)::numeric) AND (d1 <= (1)::numeric))), + CONSTRAINT global_retention_benchmarks_d30_check CHECK (((d30 > (0)::numeric) AND (d30 <= (1)::numeric))), + CONSTRAINT global_retention_benchmarks_d7_check CHECK (((d7 > (0)::numeric) AND (d7 <= (1)::numeric))), + CONSTRAINT retention_monotonic_check CHECK (((d1 >= d7) AND (d7 >= d30))) +); + + +ALTER TABLE public.global_retention_benchmarks OWNER TO postgres; + +-- +-- Name: global_retention_benchmarks global_retention_benchmarks_pk; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.global_retention_benchmarks + ADD CONSTRAINT global_retention_benchmarks_pk PRIMARY KEY (store_id, app_category); + + +-- +-- PostgreSQL database dump complete +-- + +\unrestrict ZzbmOlxhPhLVUWDYI4sQ9mOAKsGLkJ3PEtJskHKE7wlaivPpClx0Cn2WGebJ9zH + diff --git a/pg-ddl/schema/public/store_app_z_scores__matview.sql b/pg-ddl/schema/public/store_app_z_scores__matview.sql index c86d29c4..e052d084 100644 --- a/pg-ddl/schema/public/store_app_z_scores__matview.sql +++ b/pg-ddl/schema/public/store_app_z_scores__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict p9qmlMALh6O4oKGcBd0GgsIPldbzvghMXUZ0m223MTYFbJ8cyHElwy99EaV5TTI +\restrict bwYenLceSY5m7fzsDHKjuOLSnfAj2APAot2hhpl8jiTfBOeDM3vFzqqSF5hljqr --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -155,5 +155,5 @@ ALTER MATERIALIZED VIEW public.store_app_z_scores OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict p9qmlMALh6O4oKGcBd0GgsIPldbzvghMXUZ0m223MTYFbJ8cyHElwy99EaV5TTI +\unrestrict bwYenLceSY5m7fzsDHKjuOLSnfAj2APAot2hhpl8jiTfBOeDM3vFzqqSF5hljqr diff --git a/pg-ddl/schema/public/store_app_z_scores_history.sql b/pg-ddl/schema/public/store_app_z_scores_history.sql index da169c7a..ce77396a 100644 --- a/pg-ddl/schema/public/store_app_z_scores_history.sql +++ b/pg-ddl/schema/public/store_app_z_scores_history.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 1ddIhUFPy5wlXfZ0khUoMTQ5zl3MCLoRLwquIhlLVlMUjiql9qED521oFYMx3IY +\restrict n0kb0s5eB3JVkCR7is6afQvd7D3JnII0QYv23yCJfEWVge27MVt617pUcKzQZVe --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -64,5 +64,5 @@ ALTER TABLE ONLY public.store_app_z_scores_history -- PostgreSQL database dump complete -- -\unrestrict 1ddIhUFPy5wlXfZ0khUoMTQ5zl3MCLoRLwquIhlLVlMUjiql9qED521oFYMx3IY +\unrestrict n0kb0s5eB3JVkCR7is6afQvd7D3JnII0QYv23yCJfEWVge27MVt617pUcKzQZVe diff --git a/pg-ddl/schema/public/store_apps.sql b/pg-ddl/schema/public/store_apps.sql index e7c865db..bcca5e47 100644 --- a/pg-ddl/schema/public/store_apps.sql +++ b/pg-ddl/schema/public/store_apps.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict JKp5YQaGRP9rXUuZrZaUKyOwDay6IgioWAg6XspDYhJUzSzXrlxdPkxmIu7jXkL +\restrict mXVNVSypIWD1JBE2q0bGg36u4zj9w14Aa6lAx8fDEPxicKnWjwvGixXq4Js16Ze --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -107,6 +107,13 @@ CREATE INDEX store_apps_developer_idx ON public.store_apps USING btree (develope CREATE INDEX store_apps_name_idx ON public.store_apps USING gin (to_tsvector('simple'::regconfig, (name)::text)); +-- +-- Name: store_apps_store_and_id_idx; Type: INDEX; Schema: public; Owner: james +-- + +CREATE INDEX store_apps_store_and_id_idx ON public.store_apps USING btree (store, id); + + -- -- Name: store_apps_store_id_idx; Type: INDEX; Schema: public; Owner: james -- @@ -128,13 +135,6 @@ CREATE INDEX store_apps_updated_at_idx ON public.store_apps USING btree (updated CREATE INDEX textsearch_generated_idx ON public.store_apps USING gin (textsearchable_index_col); --- --- Name: store_apps store_app_audit; Type: TRIGGER; Schema: public; Owner: james --- - -CREATE TRIGGER store_app_audit AFTER INSERT OR DELETE OR UPDATE ON public.store_apps FOR EACH ROW EXECUTE FUNCTION public.process_store_app_audit(); - - -- -- Name: store_apps store_apps_updated_at; Type: TRIGGER; Schema: public; Owner: james -- @@ -170,5 +170,5 @@ ALTER TABLE ONLY public.store_apps -- PostgreSQL database dump complete -- -\unrestrict JKp5YQaGRP9rXUuZrZaUKyOwDay6IgioWAg6XspDYhJUzSzXrlxdPkxmIu7jXkL +\unrestrict mXVNVSypIWD1JBE2q0bGg36u4zj9w14Aa6lAx8fDEPxicKnWjwvGixXq4Js16Ze diff --git a/pg-ddl/schema/public/store_apps_created_at__matview.sql b/pg-ddl/schema/public/store_apps_created_at__matview.sql deleted file mode 100644 index fefc879d..00000000 --- a/pg-ddl/schema/public/store_apps_created_at__matview.sql +++ /dev/null @@ -1,75 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict 8DVuVuAYtUYq1dvTBsxG7ADYZrSCRMaBcfGAhOUQ8oLAIyGO8piqmubmAb9zkFd - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: store_apps_created_at; Type: MATERIALIZED VIEW; Schema: public; Owner: postgres --- - -CREATE MATERIALIZED VIEW public.store_apps_created_at AS - WITH my_dates AS ( - SELECT num_series.store, - (generate_series((CURRENT_DATE - '365 days'::interval), (CURRENT_DATE)::timestamp without time zone, '1 day'::interval))::date AS date - FROM generate_series(1, 2, 1) num_series(store) - ), created_dates AS ( - SELECT sa.store, - (sa.created_at)::date AS created_date, - sas.crawl_source, - count(*) AS created_count - FROM (public.store_apps sa - LEFT JOIN logging.store_app_sources sas ON (((sa.id = sas.store_app) AND (sa.store = sas.store)))) - WHERE (sa.created_at >= (CURRENT_DATE - '365 days'::interval)) - GROUP BY sa.store, ((sa.created_at)::date), sas.crawl_source - ) - SELECT my_dates.store, - my_dates.date, - created_dates.crawl_source, - created_dates.created_count - FROM (my_dates - LEFT JOIN created_dates ON (((my_dates.date = created_dates.created_date) AND (my_dates.store = created_dates.store)))) - WITH NO DATA; - - -ALTER MATERIALIZED VIEW public.store_apps_created_at OWNER TO postgres; - --- --- Name: idx_store_apps_created_at; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX idx_store_apps_created_at ON public.store_apps_created_at USING btree (store, date, crawl_source); - - --- --- Name: idx_store_apps_created_atx; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE UNIQUE INDEX idx_store_apps_created_atx ON public.store_apps_created_at USING btree (store, date, crawl_source); - - --- --- PostgreSQL database dump complete --- - -\unrestrict 8DVuVuAYtUYq1dvTBsxG7ADYZrSCRMaBcfGAhOUQ8oLAIyGO8piqmubmAb9zkFd - diff --git a/pg-ddl/schema/public/store_apps_descriptions.sql b/pg-ddl/schema/public/store_apps_descriptions.sql index 626f8766..bc1a08df 100644 --- a/pg-ddl/schema/public/store_apps_descriptions.sql +++ b/pg-ddl/schema/public/store_apps_descriptions.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict FbaVbK09WEtXfTVgKF6uOlP8BgOyou2hZc30wfMGxPQL9QtPdpMi0QyrYxrTNIn +\restrict 0E9aPJ0S3xlmhfqsVG5y9AdXdpEwKCNH2VNZ9qg4cEgl9cUn8diFQrSIgrY8wdm --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -112,5 +112,5 @@ ALTER TABLE ONLY public.store_apps_descriptions -- PostgreSQL database dump complete -- -\unrestrict FbaVbK09WEtXfTVgKF6uOlP8BgOyou2hZc30wfMGxPQL9QtPdpMi0QyrYxrTNIn +\unrestrict 0E9aPJ0S3xlmhfqsVG5y9AdXdpEwKCNH2VNZ9qg4cEgl9cUn8diFQrSIgrY8wdm diff --git a/pg-ddl/schema/public/store_apps_in_latest_rankings__matview.sql b/pg-ddl/schema/public/store_apps_in_latest_rankings__matview.sql index b9fd0e18..dd234e00 100644 --- a/pg-ddl/schema/public/store_apps_in_latest_rankings__matview.sql +++ b/pg-ddl/schema/public/store_apps_in_latest_rankings__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict LLAe02uzMWiI9dbGZWQYhWGl9RmGxkDIJfDTGefNeYWc64F0Jm2U88hWHct052Y +\restrict 3ofGSx0Qgea00aq0wrMJ48v43iTjwdZkna5oX3g2XQ0hdRGEPzJoeFL2NruHQiV --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -36,7 +36,7 @@ CREATE MATERIALIZED VIEW public.store_apps_in_latest_rankings AS sa.installs, sa.rating_count, sa.store_id - FROM (frontend.store_apps_z_scores saz + FROM (frontend.z_scores_top_apps saz LEFT JOIN frontend.store_apps_overview sa ON (((saz.store_id)::text = (sa.store_id)::text))) WHERE sa.free ORDER BY COALESCE(saz.installs_z_score_2w, saz.ratings_z_score_2w) DESC @@ -80,5 +80,5 @@ ALTER MATERIALIZED VIEW public.store_apps_in_latest_rankings OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict LLAe02uzMWiI9dbGZWQYhWGl9RmGxkDIJfDTGefNeYWc64F0Jm2U88hWHct052Y +\unrestrict 3ofGSx0Qgea00aq0wrMJ48v43iTjwdZkna5oX3g2XQ0hdRGEPzJoeFL2NruHQiV diff --git a/pg-ddl/schema/public/store_apps_updated_at__matview.sql b/pg-ddl/schema/public/store_apps_updated_at__matview.sql deleted file mode 100644 index a6634954..00000000 --- a/pg-ddl/schema/public/store_apps_updated_at__matview.sql +++ /dev/null @@ -1,75 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict 8nFK1MrqCYLiuWgHbwDPi8Dq3jTzzZ7EnzKLDYWEaMFSY7LmaEEsxGzT0NnAB04 - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: store_apps_updated_at; Type: MATERIALIZED VIEW; Schema: public; Owner: postgres --- - -CREATE MATERIALIZED VIEW public.store_apps_updated_at AS - WITH my_dates AS ( - SELECT num_series.store, - (generate_series((CURRENT_DATE - '365 days'::interval), (CURRENT_DATE)::timestamp without time zone, '1 day'::interval))::date AS date - FROM generate_series(1, 2, 1) num_series(store) - ), updated_dates AS ( - SELECT store_apps.store, - (store_apps.updated_at)::date AS last_updated_date, - count(*) AS last_updated_count - FROM public.store_apps - WHERE (store_apps.updated_at >= (CURRENT_DATE - '365 days'::interval)) - GROUP BY store_apps.store, ((store_apps.updated_at)::date) - ) - SELECT my_dates.store, - my_dates.date, - updated_dates.last_updated_count, - audit_dates.updated_count - FROM ((my_dates - LEFT JOIN updated_dates ON (((my_dates.date = updated_dates.last_updated_date) AND (my_dates.store = updated_dates.store)))) - LEFT JOIN public.audit_dates ON ((my_dates.date = audit_dates.updated_date))) - ORDER BY my_dates.date DESC - WITH NO DATA; - - -ALTER MATERIALIZED VIEW public.store_apps_updated_at OWNER TO postgres; - --- --- Name: idx_my_materialized_view_store_date; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE INDEX idx_my_materialized_view_store_date ON public.store_apps_updated_at USING btree (store, date); - - --- --- Name: idx_store_apps_updated_at; Type: INDEX; Schema: public; Owner: postgres --- - -CREATE UNIQUE INDEX idx_store_apps_updated_at ON public.store_apps_updated_at USING btree (store, date); - - --- --- PostgreSQL database dump complete --- - -\unrestrict 8nFK1MrqCYLiuWgHbwDPi8Dq3jTzzZ7EnzKLDYWEaMFSY7LmaEEsxGzT0NnAB04 - diff --git a/pg-ddl/schema/public/store_categories.sql b/pg-ddl/schema/public/store_categories.sql index f6b23d21..7abd22eb 100644 --- a/pg-ddl/schema/public/store_categories.sql +++ b/pg-ddl/schema/public/store_categories.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict IMASdKTDCWkhoCamaYSWQcxjCeLOwQCxFaiY0RxUtwdPNRbdjJGiidHYDI8ylYS +\restrict Thhbw6MaJ29JbuGTSlnJo88SwGJFNpgSqmMidCWDtlvvKMVrZKLsIz7b5sJ7CG0 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -78,5 +78,5 @@ ALTER TABLE ONLY public.store_categories -- PostgreSQL database dump complete -- -\unrestrict IMASdKTDCWkhoCamaYSWQcxjCeLOwQCxFaiY0RxUtwdPNRbdjJGiidHYDI8ylYS +\unrestrict Thhbw6MaJ29JbuGTSlnJo88SwGJFNpgSqmMidCWDtlvvKMVrZKLsIz7b5sJ7CG0 diff --git a/pg-ddl/schema/public/store_collections.sql b/pg-ddl/schema/public/store_collections.sql index bf6456f3..5a5b4733 100644 --- a/pg-ddl/schema/public/store_collections.sql +++ b/pg-ddl/schema/public/store_collections.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict LrNtr970WepmwSrVtIZlKniVGlTlm9wpqp3BtvIyYA4mRgkPb5VEvB2CQlqN08t +\restrict nRYdlW2ZMyUcLNGWcIpRRhV1yvCheRZv1SHd4bUpHmHqd1akGAEU3PU9dQG3VyS --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -78,5 +78,5 @@ ALTER TABLE ONLY public.store_collections -- PostgreSQL database dump complete -- -\unrestrict LrNtr970WepmwSrVtIZlKniVGlTlm9wpqp3BtvIyYA4mRgkPb5VEvB2CQlqN08t +\unrestrict nRYdlW2ZMyUcLNGWcIpRRhV1yvCheRZv1SHd4bUpHmHqd1akGAEU3PU9dQG3VyS diff --git a/pg-ddl/schema/public/stores.sql b/pg-ddl/schema/public/stores.sql index 16151e13..9370cd91 100644 --- a/pg-ddl/schema/public/stores.sql +++ b/pg-ddl/schema/public/stores.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 5xWz6sSwTJcWSZKmUxwwM2HWZpsMDMbJ4grKnh4ytkjYnDIKn3Z8CMOKyCmU86b +\restrict vWwJdRpXhA2fB9AeQL2ME4pcPCKrpPpJu6y9X3zZL9rVPBWd6RhRf1xp25WP0k6 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -93,5 +93,5 @@ ALTER TABLE ONLY public.stores -- PostgreSQL database dump complete -- -\unrestrict 5xWz6sSwTJcWSZKmUxwwM2HWZpsMDMbJ4grKnh4ytkjYnDIKn3Z8CMOKyCmU86b +\unrestrict vWwJdRpXhA2fB9AeQL2ME4pcPCKrpPpJu6y9X3zZL9rVPBWd6RhRf1xp25WP0k6 diff --git a/pg-ddl/schema/public/total_count_overview__matview.sql b/pg-ddl/schema/public/total_count_overview__matview.sql index e551ef49..7e0edf18 100644 --- a/pg-ddl/schema/public/total_count_overview__matview.sql +++ b/pg-ddl/schema/public/total_count_overview__matview.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict dDb0wKyzDEugTHDjLFsOjNNoFh4VXUBDndw6b1hsscAecp5OteFtBeVgSQmLxRP +\restrict a4jyjd9TXzilZ8n9c6t5Dm903EuJ1PeLYmhOR9XgZOlckC5P66W7grFc1BVOGUi --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -166,5 +166,5 @@ ALTER MATERIALIZED VIEW public.total_count_overview OWNER TO postgres; -- PostgreSQL database dump complete -- -\unrestrict dDb0wKyzDEugTHDjLFsOjNNoFh4VXUBDndw6b1hsscAecp5OteFtBeVgSQmLxRP +\unrestrict a4jyjd9TXzilZ8n9c6t5Dm903EuJ1PeLYmhOR9XgZOlckC5P66W7grFc1BVOGUi diff --git a/pg-ddl/schema/public/user_requested_scan.sql b/pg-ddl/schema/public/user_requested_scan.sql deleted file mode 100644 index d0c60919..00000000 --- a/pg-ddl/schema/public/user_requested_scan.sql +++ /dev/null @@ -1,81 +0,0 @@ --- --- PostgreSQL database dump --- - -\restrict pR5NU8zWWjwKRMjlscJydPMAmEcyioDNHHBJAfIOEnkaCs95C4yqbboFR99SX8a - --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) - -SET statement_timeout = 0; -SET lock_timeout = 0; -SET idle_in_transaction_session_timeout = 0; -SET transaction_timeout = 0; -SET client_encoding = 'UTF8'; -SET standard_conforming_strings = on; -SELECT pg_catalog.set_config('search_path', '', false); -SET check_function_bodies = false; -SET xmloption = content; -SET client_min_messages = warning; -SET row_security = off; - -SET default_tablespace = ''; - -SET default_table_access_method = heap; - --- --- Name: user_requested_scan; Type: TABLE; Schema: public; Owner: postgres --- - -CREATE TABLE public.user_requested_scan ( - id integer NOT NULL, - store_id character varying NOT NULL, - created_at timestamp without time zone DEFAULT CURRENT_TIMESTAMP -); - - -ALTER TABLE public.user_requested_scan OWNER TO postgres; - --- --- Name: user_requested_scan_id_seq; Type: SEQUENCE; Schema: public; Owner: postgres --- - -CREATE SEQUENCE public.user_requested_scan_id_seq - AS integer - START WITH 1 - INCREMENT BY 1 - NO MINVALUE - NO MAXVALUE - CACHE 1; - - -ALTER SEQUENCE public.user_requested_scan_id_seq OWNER TO postgres; - --- --- Name: user_requested_scan_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: postgres --- - -ALTER SEQUENCE public.user_requested_scan_id_seq OWNED BY public.user_requested_scan.id; - - --- --- Name: user_requested_scan id; Type: DEFAULT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.user_requested_scan ALTER COLUMN id SET DEFAULT nextval('public.user_requested_scan_id_seq'::regclass); - - --- --- Name: user_requested_scan user_requested_scan_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.user_requested_scan - ADD CONSTRAINT user_requested_scan_pkey PRIMARY KEY (id); - - --- --- PostgreSQL database dump complete --- - -\unrestrict pR5NU8zWWjwKRMjlscJydPMAmEcyioDNHHBJAfIOEnkaCs95C4yqbboFR99SX8a - diff --git a/pg-ddl/schema/public/version_code_api_scan_results.sql b/pg-ddl/schema/public/version_code_api_scan_results.sql index 106ecafb..efc1e585 100644 --- a/pg-ddl/schema/public/version_code_api_scan_results.sql +++ b/pg-ddl/schema/public/version_code_api_scan_results.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict jVVVNvhkahcUbPmFmq1lldPMPJupwAA9e7Yj1rNXlsG3fo3oaPMQxSbO4p0k6wc +\restrict 2njHsb7r3NLGtdNWnVjSpv2jWAzTa2EKR7PI0dT6Ieobx7FVnxtyhXbaytNZPjf --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -87,5 +87,5 @@ ALTER TABLE ONLY public.version_code_api_scan_results -- PostgreSQL database dump complete -- -\unrestrict jVVVNvhkahcUbPmFmq1lldPMPJupwAA9e7Yj1rNXlsG3fo3oaPMQxSbO4p0k6wc +\unrestrict 2njHsb7r3NLGtdNWnVjSpv2jWAzTa2EKR7PI0dT6Ieobx7FVnxtyhXbaytNZPjf diff --git a/pg-ddl/schema/public/version_code_sdk_scan_results.sql b/pg-ddl/schema/public/version_code_sdk_scan_results.sql index b7386031..e4db9309 100644 --- a/pg-ddl/schema/public/version_code_sdk_scan_results.sql +++ b/pg-ddl/schema/public/version_code_sdk_scan_results.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict mAfGeixw1WgxWhygz9Dhl9tGWdALHiPZQ8aIu3IqDkXKr1QKH7sjEQK3uKUIe6P +\restrict gK1m6GRtlfDatfXwos8XKSENBtmDAdzgI4oQfRNIF5Uq3JJ8gm8GiqvhuC3vy0Z --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -86,5 +86,5 @@ ALTER TABLE ONLY public.version_code_sdk_scan_results -- PostgreSQL database dump complete -- -\unrestrict mAfGeixw1WgxWhygz9Dhl9tGWdALHiPZQ8aIu3IqDkXKr1QKH7sjEQK3uKUIe6P +\unrestrict gK1m6GRtlfDatfXwos8XKSENBtmDAdzgI4oQfRNIF5Uq3JJ8gm8GiqvhuC3vy0Z diff --git a/pg-ddl/schema/public/version_codes.sql b/pg-ddl/schema/public/version_codes.sql index a210daa8..f0d60a7e 100644 --- a/pg-ddl/schema/public/version_codes.sql +++ b/pg-ddl/schema/public/version_codes.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict PBsKN42hVEYqdrZIJpNsmjKbLsvtC3kx6dHScBxRgHzPXiENmFLUO6uAQST0YzV +\restrict JBU9QUnqSzD5RJfFtoNskH6TkHE3CX2V8rD7KD5XGpNUrWc3lUXt5z7OL4pP2dI --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -89,5 +89,5 @@ ALTER TABLE ONLY public.version_codes -- PostgreSQL database dump complete -- -\unrestrict PBsKN42hVEYqdrZIJpNsmjKbLsvtC3kx6dHScBxRgHzPXiENmFLUO6uAQST0YzV +\unrestrict JBU9QUnqSzD5RJfFtoNskH6TkHE3CX2V8rD7KD5XGpNUrWc3lUXt5z7OL4pP2dI diff --git a/pg-ddl/schema/public/version_details_map.sql b/pg-ddl/schema/public/version_details_map.sql index eba85ffd..700fbd37 100644 --- a/pg-ddl/schema/public/version_details_map.sql +++ b/pg-ddl/schema/public/version_details_map.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict yJMBn9Coo2uM7ZeKQFgYr5EIvAE9rmIlEimeSb4fKVGeREhPbSfVlV1F7TZ7FaK +\restrict QTGi5nxLo3Aa0oXlMe9A0kkrglVSLfCkPF9kLB67Y1sKi3GydKiBtubo4uwKMeL --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -78,5 +78,5 @@ ALTER TABLE ONLY public.version_details_map -- PostgreSQL database dump complete -- -\unrestrict yJMBn9Coo2uM7ZeKQFgYr5EIvAE9rmIlEimeSb4fKVGeREhPbSfVlV1F7TZ7FaK +\unrestrict QTGi5nxLo3Aa0oXlMe9A0kkrglVSLfCkPF9kLB67Y1sKi3GydKiBtubo4uwKMeL diff --git a/pg-ddl/schema/public/version_manifests.sql b/pg-ddl/schema/public/version_manifests.sql index d24c3a24..c1cbb6b2 100644 --- a/pg-ddl/schema/public/version_manifests.sql +++ b/pg-ddl/schema/public/version_manifests.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict IFZ4VKpqzL7ifg0YEQEKnkKPekLQbcwOUX2qUocfoQ4elH6bdEpEASgJtSz8AfQ +\restrict ddWtpSJfdNPgxYmck8goqKbZspctMZuKfoBHOr0bvgdjyAMeRYXcWaY97MjUdFI --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -78,5 +78,5 @@ ALTER TABLE ONLY public.version_manifests -- PostgreSQL database dump complete -- -\unrestrict IFZ4VKpqzL7ifg0YEQEKnkKPekLQbcwOUX2qUocfoQ4elH6bdEpEASgJtSz8AfQ +\unrestrict ddWtpSJfdNPgxYmck8goqKbZspctMZuKfoBHOr0bvgdjyAMeRYXcWaY97MjUdFI diff --git a/pg-ddl/schema/public/version_strings.sql b/pg-ddl/schema/public/version_strings.sql index eb225fe7..48e3bea1 100644 --- a/pg-ddl/schema/public/version_strings.sql +++ b/pg-ddl/schema/public/version_strings.sql @@ -2,10 +2,10 @@ -- PostgreSQL database dump -- -\restrict 4WHmlfDA71QE421x9bn6jefeWCOdQG8zDnFZfMFmvkXPojHjSFKbphNdXyYWi3s +\restrict YhYbKnM9DyZln11Uf9fUHn0SilKvrXtQIzSOtygIt1GRAcXswOuKL8jA8wXG4O5 --- Dumped from database version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) --- Dumped by pg_dump version 18.0 (Ubuntu 18.0-1.pgdg24.04+3) +-- Dumped from database version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) +-- Dumped by pg_dump version 18.1 (Ubuntu 18.1-1.pgdg24.04+2) SET statement_timeout = 0; SET lock_timeout = 0; @@ -106,5 +106,5 @@ CREATE INDEX version_strings_xml_path_trgm_idx ON public.version_strings USING g -- PostgreSQL database dump complete -- -\unrestrict 4WHmlfDA71QE421x9bn6jefeWCOdQG8zDnFZfMFmvkXPojHjSFKbphNdXyYWi3s +\unrestrict YhYbKnM9DyZln11Uf9fUHn0SilKvrXtQIzSOtygIt1GRAcXswOuKL8jA8wXG4O5 diff --git a/pyproject.toml b/pyproject.toml index 325e8398..87838216 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ [project.optional-dependencies] -dev = ["pre-commit", "pytest","scikit-learn", "spacy" ] +dev = ["pre-commit", "pytest","scikit-learn", "spacy", "emoji", "tqdm" ] [build-system]