From 5b5c457e7feafbac8177654100907ca49684e097 Mon Sep 17 00:00:00 2001 From: Omar Date: Mon, 19 Apr 2021 13:49:25 -0600 Subject: [PATCH 1/7] set up workflow for gcmd stats --- .DS_Store | Bin 0 -> 8196 bytes __pycache__/gcmdsupport.cpython-38.pyc | Bin 0 -> 964 bytes cesm_expdb/.DS_Store | Bin 0 -> 12292 bytes cgd_gcmd.txt | 6 ++ cgd_gcmd_counts_levels.txt | 16 +++++ gcmd.py | 80 +++++++++++++++++++++++++ gcmdsupport.py | 36 +++++++++++ 7 files changed, 138 insertions(+) create mode 100644 .DS_Store create mode 100644 __pycache__/gcmdsupport.cpython-38.pyc create mode 100644 cesm_expdb/.DS_Store create mode 100644 cgd_gcmd.txt create mode 100644 cgd_gcmd_counts_levels.txt create mode 100644 gcmd.py create mode 100644 gcmdsupport.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a4b57c3c478ca11c7d419bdfa67116cce584be66 GIT binary patch literal 8196 zcmeHMPiqrF6#q>dv8GfCg2sceH$iD=iblmtk|u|OwrYCt7MpBL3!7|66REeo`$aqm z;x`cVEBIkN)!&;L!tQ3bJy=A@%rf()Z{C}?@Au|U-WGsr%=`*a0K{QkaP@(fJ49`;1F;KI0XI|1bAmlSR(#TMKbgFC*QMjHA^EyjXIP~ zK9serY=)w=bZSVxD?7$4EY5$iGf!r2SLp0&iytnSgumvdd%19G$y@2&;& zBvYPzzI9uBk~8lxO!#$)bizyX78}?_3vIl_7GAKo&wd$a7$Qh|imy~C$2@&aO!_#b zWSPAIeA1D|YQo-Z-s^K@M&!EW_EF(eMgu$8#v$IsY`!Pw8RZ6)s8XiQ=a6)wtxOTJ zcR~vTJ9$tyHr;PYM>RhkcG5(#h`rrzzR=Kjo>Z@#6v|AM=qtHqF$P-5=L2a_8>p zvqMgd7IkPfX>WBm^>#P7-&>4blhMo8_?FV(pROz^0;|UFlH~ub{O|uuijH&JA+Q_> zs6w;dY|vMmTQ7Jmxwgmpj#Y}tMTTk>HaZBAil}QTInp`do%BS?|W~D*_fR40N1@A`I~Pxz)y2HzbrgF zM#yVuC{Qwh?ZT67LM>YQ2-_AV>(H=IkpZg#F>btE^b?3>gnWP*1sNQYFEcq&koM)oz9%2YF43M9VaBP z3414~>-9Zk*@g0$WB=pC*T=nok9(sAt(p3w<5r=9)1k`Ei=4fa4l-iT^e*Z$2k&-b z_0(KzgJ>s=DT`HlsTa|uc5A22luv!-r?Vyynq1fo17C$aUWvOrt&fQ{Uci@@cYWapiive4!nl{#Vt!*x zm5rLVdBS2{J%|R1(=brFocKaAZ42g8?IZ~nDLuI+m{P+vb6`SyEBKcOPlVt?yEsL$ za|S%{2U0tJf|^uUUkwvBN;O>VcEbUS{fKF+OLd9zKwBt|ZyL7UQ0|OswflE6ZL77z9#yhqujzT905dUP=noHosG%l#zbLfuY=daB3N)8%%n zD|pmtKR1o!9W+2ZGH*?kEV4kRh>f^vd6sQC#3dG)CYMKcip=0{1~XzFYZ@itpc7SN cOkgzEIWvbb?LLe+-5;<=<}paq(Iu|+2ROL(n*aa+ literal 0 HcmV?d00001 diff --git a/cesm_expdb/.DS_Store b/cesm_expdb/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..cccf6158b04b1caa80876be7982d725a3bc25eba GIT binary patch literal 12292 zcmeHN-A)rh6h2!pA;AQU#_QggpxDwH|h{&p(w2DMiL^OuXa`HBcag!a# zjgt6(&Y?7d5)1hOGOj0y-qI`;DNhxuhidZ#wM`o+nd5>!B<+ z(jZ_EFbEg~ZW;nS_aTjG^SxVYehmT!fg6ng`-dzt%d#hXt`x2VKfHwI|9J^560Whh z6=hjZmOa^XrD%#aqA8@?)bc_M%WaPGELVptd$Q+Bw>epEbFw_m9Jp|@gm%a*Zjh|9 z5a!1qU=Zkoz#Wu5)*{0##H<|m782s!4x-{!a_Eq{bOdR~N4z6Hq3bN-#kC%hvfdh= zxS620i0HTpovZW`d_#`5Xa(0CiN{9VBd<06^PzsbPduf$1PgZ1@`z4gM;CQZ2eo8$ zUbCbWvg8?9g?b_q`xt>^*xG7Z+aF3590|AScLRLsd<2)=#>hZBknmr}3 zEj}l+MiP^4@Js8!+-GAXE7MkZnKh4@$$>?2`I{W%9KDNwORO)N6*K3u!e@t8JbPl_ z(agl9XrBcJ{Wz(Qn)W9b)b|dx1Ut5gw{sWYO&4D*dyP8YavsM7o-FW|4%4P1d_yU0 zVowsVX<-OBxtJy5!(KID%kofcIY>WZ?AGbwg>ZdS7TAcw=ewO)mT)D@J8$lRg0!E$NBTY#@U$p^FTjV-iH4_g#XFm*P-X| zKJosBeW0op6QfB60fT@+z#w1{xMm34MSmi8oP+z-SpN~_l{HjED|;`r^BhsL?NkTY z(Z2Bg?{pkT3%l>TBEEhrBJCX3hc@mTD34u7a5NjhYetK%)9GieZLD1e%-JqYhH9bhrV*O-oomR86_tkM z^$bV2$2ocrd%8G>!CXr_!5%)>nXCG!r_!CvH?#<-* zGB;PFSIF@rE&ZAt@z_xo%&|Jk{ZDJn;%grCv=;Nn+~=gzTI|`>{_B25WlDVv(tdFg zjbnJ24s2n*H5L)!_k4T8XVE0v7-l5o8%=ZzeL|Yz{r{)Y=l?_{^J5S&2wYDDtkG({ zT0!5uz4cHpd9OW-{05m5*X!{G60XHoJfzr)hZI}!kOjAfs8jap_FRd7kuLoZK-9@y fISZ-#Xs-X2-cmcgYvzA#WpCzx|GfBrG5`Mowap0U literal 0 HcmV?d00001 diff --git a/cgd_gcmd.txt b/cgd_gcmd.txt new file mode 100644 index 0000000..207a25a --- /dev/null +++ b/cgd_gcmd.txt @@ -0,0 +1,6 @@ +Earth Science > Climate Indicators > Atmospheric/ Ocean Indicators > Teleconnections > North Atlantic Oscillation > NAO +CESM > NCAR Community Earth System Model +EARTH SCIENCE > CLIMATE INDICATORS > ATMOSPHERIC/OCEAN INDICATORS > TELECONNECTIONS > NORTH PACIFIC PATTERN > NP +EARTH SCIENCE > CLIMATE INDICATORS > ATMOSPHERIC/OCEAN INDICATORS > TELECONNECTIONS > ARCTIC OSCILLATION > AO +no keywords +Earth Science > Climate Indicators > Atmospheric/ Ocean Indicators > Teleconnections > North Atlantic Oscillation > NAO diff --git a/cgd_gcmd_counts_levels.txt b/cgd_gcmd_counts_levels.txt new file mode 100644 index 0000000..5b1ec27 --- /dev/null +++ b/cgd_gcmd_counts_levels.txt @@ -0,0 +1,16 @@ +earth science > climate indicators > atmospheric/ ocean indicators > teleconnections > north atlantic oscillation > nao Count: 2 Level 6 + +cesm > ncar community earth system model Count: 1 Level 2 + +earth science > climate indicators > atmospheric/ocean indicators > teleconnections > north pacific pattern > np Count: 1 Level 6 + +earth science > climate indicators > atmospheric/ocean indicators > teleconnections > arctic oscillation > ao Count: 1 Level 6 + +no keywords Count: 1 Level 1 + +Level 1: 1 +Level 2: 1 +Level 3: 0 +Level 4: 0 +Level 5: 0 +Level 6: 3 diff --git a/gcmd.py b/gcmd.py new file mode 100644 index 0000000..4c24a20 --- /dev/null +++ b/gcmd.py @@ -0,0 +1,80 @@ +import glob +import sys +import xml.etree.ElementTree as ET +import os +from xml.etree.ElementTree import parse +import time +import xmltodict +import pprint +from gcmdsupport import getGCMDfromXML +import os +from os import path +from collections import Counter + + +path1 = '.' +if(path.exists("cgd_gcmd.txt")): + os.remove("cgd_gcmd.txt") +fo = open("cgd_gcmd.txt", "w+") + +for filename in os.listdir(path1): + if not filename.endswith('.xml'): continue + fullname = os.path.join(path1, filename) + keywords = getGCMDfromXML(fullname) + for keyword in keywords: + fo.writelines(keyword) + fo.writelines("\n") + +fo.close() + +level1 = 0 +level2 = 0 +level3 = 0 +level4 = 0 +level5 = 0 +level6 = 0 + +# with open('cgd_gcmd.txt') as f: +# seen = set() +# for line in f: +# line_lower = line.lower() +# if line_lower in seen: +# print(line) +# else: +# seen.add(line_lower) + +if(path.exists("cgd_gcmd_counts_levels.txt")): + os.remove("cgd_gcmd_counts_levels.txt") + +sys.stdout = open("cgd_gcmd_counts_levels.txt", "w") + +with open('cgd_gcmd.txt') as f: + c=Counter(c.strip().lower() for c in f if c.strip()) #for case-insensitive search + for line in c: + if (c[line]>=1): + level = line.count('>') + 1 + if(level == 1): + level1 += 1 + elif(level == 2): + level2 += 1 + elif(level == 3): + level3 += 1 + elif(level == 4): + level4 += 1 + elif(level == 5): + level5 += 1 + elif(level == 6): + level6 += 1 + count = "Count: " + str(c[line]) + level = "Level " + str(level) + fileline = ''.join((line, ' ', count, ' ', level, '\n')) + print(fileline) + +print("Level 1:", level1) +print("Level 2:", level2) +print("Level 3:", level3) +print("Level 4:", level4) +print("Level 5:", level5) +print("Level 6:", level6) + +sys.stdout.close() \ No newline at end of file diff --git a/gcmdsupport.py b/gcmdsupport.py new file mode 100644 index 0000000..05186aa --- /dev/null +++ b/gcmdsupport.py @@ -0,0 +1,36 @@ +import xmltodict +import pprint + +def getGCMDfromXML(filename): + with open(filename) as fd: + doc = xmltodict.parse(fd.read()) + + pp = pprint.PrettyPrinter(indent=1) + + #print(doc['gmd:MD_Metadata']['gmd:identificationInfo']['gmd:MD_DataIdentification']['gmd:descriptiveKeywords']) + try: + gcmd_ref = doc['gmd:MD_Metadata']['gmd:identificationInfo']['gmd:MD_DataIdentification']['gmd:descriptiveKeywords'][1]['gmd:MD_Keywords']['gmd:keyword'] + except IndexError: + pass + except KeyError: + gcmd_ref = doc['gmd:MD_Metadata']['gmd:identificationInfo']['gmd:MD_DataIdentification']['gmd:descriptiveKeywords'] + + is_local = "gcmd_ref" in locals() + + keywords = [] + + if is_local: + for elem in gcmd_ref: + try: + keywords.append(elem['gco:CharacterString']) + except TypeError: + try: + keywords.append(gcmd_ref['gco:CharacterString']) + except KeyError: + keywords.append("no keywords") + except KeyError: + keywords.append("no keywords") + else: + keywords.append("no keywords") + + return keywords \ No newline at end of file From 5496a8cf367f443b31d7ba7e7cdcff0cfb441c09 Mon Sep 17 00:00:00 2001 From: Omar Date: Mon, 19 Apr 2021 13:55:34 -0600 Subject: [PATCH 2/7] added yml script for workflow --- .github/workflows/gcmd.yml | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .github/workflows/gcmd.yml diff --git a/.github/workflows/gcmd.yml b/.github/workflows/gcmd.yml new file mode 100644 index 0000000..7120959 --- /dev/null +++ b/.github/workflows/gcmd.yml @@ -0,0 +1,45 @@ +name: Get GCMD Stats + +on: [push] + +jobs: + run: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: Install dependencies + run: | + git config --local user.email "action@github.com" + git config --local user.name "github-actions" + git pull + python -m pip install --upgrade pip + pip install pandas + pip install matplotlib + pip install xmltodict + echo "done" + - name: Run GCMD Count script + run: | + python3 gcmd.py + - name: Commit files + id: commit + run: | + git config --local user.email "action@github.com" + git config --local user.name "github-actions" + git add --all + if [ -z "$(git status --porcelain)" ]; then + echo "::set-output name=push::false" + else + git commit -m "Add changes" -a + echo "::set-output name=push::true" + fi + shell: bash + - name: Push changes + if: steps.commit.outputs.push == 'true' + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} From 270db87ea17b87363f0107d9531fac8fea42eb11 Mon Sep 17 00:00:00 2001 From: Omar Date: Mon, 19 Apr 2021 18:46:54 -0600 Subject: [PATCH 3/7] linked counts in readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a3ef3eb..4366891 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,10 @@ https://service.ncddc.noaa.gov/rdn/www/metadata-standards/documents/MD-Metadata. ## CGD Completeness Graph +![GCMD Keywords](https://github.com/NCAR/dash-cgd-prod/blob/5b5c457e7feafbac8177654100907ca49684e097/cgd_gcmd.txt) + +![GCMD Level Counts](https://github.com/NCAR/dash-cgd-prod/blob/5b5c457e7feafbac8177654100907ca49684e097/cgd_gcmd.txt#L11) + ![CGD Completeness Graph](https://raw.githubusercontent.com/NCAR/dash-eol-prod/master/actions/CGD/barcharts/cgd.png) ## Publisher Information From bd94856712066246de5df5cf3fcc3bc45c3e9110 Mon Sep 17 00:00:00 2001 From: Omar Date: Mon, 19 Apr 2021 19:00:40 -0600 Subject: [PATCH 4/7] added pull --- .github/workflows/gcmd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/gcmd.yml b/.github/workflows/gcmd.yml index 7120959..4fae96b 100644 --- a/.github/workflows/gcmd.yml +++ b/.github/workflows/gcmd.yml @@ -30,6 +30,7 @@ jobs: run: | git config --local user.email "action@github.com" git config --local user.name "github-actions" + git pull git add --all if [ -z "$(git status --porcelain)" ]; then echo "::set-output name=push::false" From b7718cca129654b22b95bceaded3e7db367be865 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 20 Apr 2021 01:07:25 +0000 Subject: [PATCH 5/7] Add changes --- __pycache__/gcmdsupport.cpython-39.pyc | Bin 0 -> 986 bytes cgd_gcmd.txt | 6 +++--- cgd_gcmd_counts_levels.txt | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) create mode 100644 __pycache__/gcmdsupport.cpython-39.pyc diff --git a/__pycache__/gcmdsupport.cpython-39.pyc b/__pycache__/gcmdsupport.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..08c58ddd0166a52f7787b3cedb8350b8d03ce655 GIT binary patch literal 986 zcmZWn-A)uS6mGlS+1Ve^AOY7H;sr6;1lTn(ULY|>))=BoNH9S*F*uoamyw-z=oDlp zGv45>kq6KOE`0$Wrng@43SN1-vmi>F@0>Y3-}%m&rryLv1;KjS`267qMd+6et}gd!D{-k$e`O-oE4J!6uQehmkQz`#HHc)Jib|<(QPD13$e0xkI^B@ihYzhY4HTB$0=>2_eoYtrc&p$ zoR(On4ijUvh4#-uwQ_=oQ@QS#LH{zuob3yY+PE_)u&S-j`)%hMma%k`v47rzyr|f| zc~$DbJ=fMR>=ie#aRR}((8;ZS$d)_poetL?^R$=D*n`04f?nWzI*?*b^kh;W6Wc&q z`zM{u+7#oluYyPihx|1kev*pCSK?O(RmBm)z<5a(Q*vQ=YH6+SDo# z{iG`7gE2L3R1K987p8m^4zy%}uT3%XRLl)k++)UxA}G>kVncDQhZ{EF%2d|iFOOa- zC6#euiu}tNNZ%X8#_=N1WTyOf81Yf6;TpFW47l(@Zb*-r0+YTWAP(;vb=@G|8Pyv1 zaxz2tfQLVkngPP^mFBJtc~cz-!BrD3VXw)&cz4n7v&BeB_FrT#AU=p9sq|7bG?RU< zU#)i9Jtf0;oj0~++yaAe1uu|Vf#7*u!xV6jR0t&wb}_+Icyer~;cFT)>MqnY3d#Ye e%GTI}(S+B`ESwp4C6w%7z@OP?5!;R~cF7+s@%QTh literal 0 HcmV?d00001 diff --git a/cgd_gcmd.txt b/cgd_gcmd.txt index 207a25a..022932a 100644 --- a/cgd_gcmd.txt +++ b/cgd_gcmd.txt @@ -1,6 +1,6 @@ -Earth Science > Climate Indicators > Atmospheric/ Ocean Indicators > Teleconnections > North Atlantic Oscillation > NAO +no keywords +EARTH SCIENCE > CLIMATE INDICATORS > ATMOSPHERIC/OCEAN INDICATORS > TELECONNECTIONS > ARCTIC OSCILLATION > AO CESM > NCAR Community Earth System Model +Earth Science > Climate Indicators > Atmospheric/ Ocean Indicators > Teleconnections > North Atlantic Oscillation > NAO EARTH SCIENCE > CLIMATE INDICATORS > ATMOSPHERIC/OCEAN INDICATORS > TELECONNECTIONS > NORTH PACIFIC PATTERN > NP -EARTH SCIENCE > CLIMATE INDICATORS > ATMOSPHERIC/OCEAN INDICATORS > TELECONNECTIONS > ARCTIC OSCILLATION > AO -no keywords Earth Science > Climate Indicators > Atmospheric/ Ocean Indicators > Teleconnections > North Atlantic Oscillation > NAO diff --git a/cgd_gcmd_counts_levels.txt b/cgd_gcmd_counts_levels.txt index 5b1ec27..3ee347f 100644 --- a/cgd_gcmd_counts_levels.txt +++ b/cgd_gcmd_counts_levels.txt @@ -1,12 +1,12 @@ -earth science > climate indicators > atmospheric/ ocean indicators > teleconnections > north atlantic oscillation > nao Count: 2 Level 6 +no keywords Count: 1 Level 1 -cesm > ncar community earth system model Count: 1 Level 2 +earth science > climate indicators > atmospheric/ocean indicators > teleconnections > arctic oscillation > ao Count: 1 Level 6 -earth science > climate indicators > atmospheric/ocean indicators > teleconnections > north pacific pattern > np Count: 1 Level 6 +cesm > ncar community earth system model Count: 1 Level 2 -earth science > climate indicators > atmospheric/ocean indicators > teleconnections > arctic oscillation > ao Count: 1 Level 6 +earth science > climate indicators > atmospheric/ ocean indicators > teleconnections > north atlantic oscillation > nao Count: 2 Level 6 -no keywords Count: 1 Level 1 +earth science > climate indicators > atmospheric/ocean indicators > teleconnections > north pacific pattern > np Count: 1 Level 6 Level 1: 1 Level 2: 1 From 5f4a62f79f758eec161544051595237e6a8d71ec Mon Sep 17 00:00:00 2001 From: Omar Kaheel Date: Mon, 19 Apr 2021 19:08:37 -0600 Subject: [PATCH 6/7] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4366891..ce6e032 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ https://service.ncddc.noaa.gov/rdn/www/metadata-standards/documents/MD-Metadata. ![GCMD Keywords](https://github.com/NCAR/dash-cgd-prod/blob/5b5c457e7feafbac8177654100907ca49684e097/cgd_gcmd.txt) -![GCMD Level Counts](https://github.com/NCAR/dash-cgd-prod/blob/5b5c457e7feafbac8177654100907ca49684e097/cgd_gcmd.txt#L11) +![GCMD Level Counts](https://github.com/okaheel/dash-cgd-prod/blob/master/cgd_gcmd_counts_levels.txt#L11) ![CGD Completeness Graph](https://raw.githubusercontent.com/NCAR/dash-eol-prod/master/actions/CGD/barcharts/cgd.png) From a2ff4bfa2c35e9effafa1fc26352ab26deed1c03 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 20 Apr 2021 01:09:07 +0000 Subject: [PATCH 7/7] Add changes --- __pycache__/gcmdsupport.cpython-39.pyc | Bin 986 -> 986 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/__pycache__/gcmdsupport.cpython-39.pyc b/__pycache__/gcmdsupport.cpython-39.pyc index 08c58ddd0166a52f7787b3cedb8350b8d03ce655..e63b38db5687106f05f3b19e9391e80bde7b11f7 100644 GIT binary patch delta 18 Ycmcb`ev6$mk(ZZ?0SFdu