wise009
07/11/2022, 11:15 AMwise009
07/11/2022, 11:15 AMyaml
word_cloud_en:
  type: pandas.CSVDataSet
  filepath: data/05_model_input/word_cloud_en.csvpython
def create_word_cloud(df: pd.DataFrame, params: Dict[str, str]) -> pd.DataFrame:
    title_col = params["title"]
    overview_col = params["overview"]
    mem_col = params["member_id"]
    lang = params["lang"]
    en_select = [title_col, overview_col, mem_col]
    df = df[en_select]
    memids = list()
    content = list()
    df.dropna(subset=[title_col, overview_col], how='all', inplace=True)
    for title, overview, mem_id in zip(df[title_col], df[overview_col], df[mem_col]):
        if title != title or title is None : title = ''
        if overview !=overview or overview is None: overview = ''
        content.append(title+overview)
        memids.append(mem_id)
    freq = [0]*(len(content))
    df = pd.DataFrame({'content': content, 'memberID': memids, 'freq':freq})
    df = df.head(200)
    if(lang == "en"):
        df['content'] = df['content'].apply(en_clean)
    elif(lang == "jp"):
        df['content'] = df['content'].apply(jp_clean)
    df = df.explode('content')
    df['freq'] = df.groupby(['content', 'memberID'])['content'].transform('count')
    df = df.sort_values(by='freq', ascending=False)
    df.drop_duplicates(inplace=True)
    df.dropna(inplace=True)
    df['content'] = df['content'].apply(no_num)
    df.dropna(inplace=True)
    df = df.sort_values(by='freq', ascending=False)
    df.reset_index(drop=True, inplace=True)
    print(df.head())
    return dfwise009
07/11/2022, 11:24 AMpython
File "/Users/alifian/opt/miniconda3/envs/kedro-kobe/lib/python3.8/site-packages/kedro/io/core.py", line 217, in save
    raise DataSetError(message) from exc
kedro.io.core.DataSetError: Failed while saving data to data set CSVDataSet(filepath=/Users/alifian/Documents/GitHub/kobe-u-ml-maintenance/data/05_model_input/word_cloud_en.csv, load_args={}, protocol=file, save_args={'index': False}).
a bytes-like object is required, not 'str'noklam
07/11/2022, 1:54 PMnoklam
07/11/2022, 1:56 PMdf.to_csv(something)return dfwise009
07/11/2022, 7:47 PMwise009
07/11/2022, 7:48 PMnoklam
07/11/2022, 7:51 PMwise009
07/11/2022, 8:12 PMnoklam
07/11/2022, 8:14 PMwise009
07/11/2022, 8:26 PMwise009
07/11/2022, 8:28 PMpython
Traceback (most recent call last):
  File "/Users/alifian/opt/miniconda3/envs/kedro-kobe/lib/python3.8/site-packages/kedro/io/core.py", line 210, in save
    self._save(data)
  File "/Users/alifian/opt/miniconda3/envs/kedro-kobe/lib/python3.8/site-packages/kedro/extras/datasets/pandas/csv_dataset.py", line 171, in _save
    data.to_csv(path_or_buf=buf, **self._save_args)
  File "/Users/alifian/opt/miniconda3/envs/kedro-kobe/lib/python3.8/site-packages/pandas/core/generic.py", line 3167, in to_csv
    formatter.save()
  File "/Users/alifian/opt/miniconda3/envs/kedro-kobe/lib/python3.8/site-packages/pandas/io/formats/csvs.py", line 206, in save
    self._save()
  File "/Users/alifian/opt/miniconda3/envs/kedro-kobe/lib/python3.8/site-packages/pandas/io/formats/csvs.py", line 314, in _save
    self._save_header()
  File "/Users/alifian/opt/miniconda3/envs/kedro-kobe/lib/python3.8/site-packages/pandas/io/formats/csvs.py", line 283, in _save_header
    writer.writerow(encoded_labels)
TypeError: a bytes-like object is required, not 'str'
The above exception was the direct cause of the following exception: