github
diff --git a/Diff for: ‎.dockerignore
+7 b/Diff for: ‎.dockerignore
+7
diff --git a/Diff for: ‎.github/workflows/test.yaml
+76 b/Diff for: ‎.github/workflows/test.yaml
+76
diff --git a/Diff for: ‎.gitignore
+27 b/Diff for: ‎.gitignore
+27
diff --git a/Diff for: ‎BENCHMARK.md
+80 b/Diff for: ‎BENCHMARK.md
+80
diff --git a/Diff for: ‎CODE_OF_CONDUCT.md
+76 b/Diff for: ‎CODE_OF_CONDUCT.md
+76
diff --git a/Diff for: ‎CONTRIBUTING.md
+47 b/Diff for: ‎CONTRIBUTING.md
+47
diff --git a/Diff for: ‎LICENSE
+21 b/Diff for: ‎LICENSE
+21
@@ -0,0 +1,7 @@
+*.csv
+*.pkl
+*.hdf5
+resources/
+!resources/README.md
+!tests/data/
+
@@ -0,0 +1,76 @@
+name: Smoke Test
+on: push
+
+# split into two jobs so it runs in parallel, even if a little redundant
+jobs:
+  docker_build:
+    name: Build Test Container
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Copy Repo Files
+      uses: actions/checkout@master
+      
+    - name: docker build
+      run: | 
+        echo ${INPUT_PASSWORD} | docker login -u ${INPUT_USERNAME} --password-stdin
+        cd $GITHUB_WORKSPACE
+        docker pull github/csnet-smoketest
+        docker build --cache-from github/csnet-smoketest -t github/csnet-smoketest -f docker/docker-cpu.Dockerfile .
+        docker push github/csnet-smoketest
+      env:
+        INPUT_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+        INPUT_USERNAME: ${{ secrets.DOCKER_USERNAME }}
+   
+  basic_tests:
+    needs: docker_build
+    name: Integration Test Default Parameters
+    runs-on: ubuntu-latest
+    
+    steps:
+    - name: mypy type checking
+      run: |
+        cd $GITHUB_WORKSPACE
+        docker run github/csnet-smoketest mypy --ignore-missing-imports --follow-imports skip /src/train.py /src/model_test.py
+      
+    - name: neuralbow, all languages
+      run: |
+        cd $GITHUB_WORKSPACE
+        docker run github/csnet-smoketest python train.py /src /tests/data/data_train.txt /tests/data/data_train.txt /tests/data/data_train.txt --dryrun --max-num-epochs 1 --model neuralbow
+      
+    - name: --max-files-per-dir 2
+      run: |
+        cd $GITHUB_WORKSPACE 
+        docker run github/csnet-smoketest python train.py /src /tests/data/data_train.txt /tests/data/data_train.txt /tests/data/data_train.txt --dryrun --max-num-epochs 1 --max-files-per-dir 2
+  
+  CNN:
+    needs: docker_build
+    name: 1DCNN
+    runs-on: ubuntu-latest
+    
+    steps:
+    - name: 1dcnn, all languages
+      run: |
+        cd $GITHUB_WORKSPACE
+        docker run github/csnet-smoketest python train.py /src /tests/data/data_train.txt /tests/data/data_train.txt /tests/data/data_train.txt --dryrun --max-num-epochs 1 --model 1dcnn
+
+  selfattn:
+    needs: docker_build
+    name: selfattn
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: selfattn, all languages
+      run: |
+        cd $GITHUB_WORKSPACE
+        docker run github/csnet-smoketest python train.py /src /tests/data/data_train.txt /tests/data/data_train.txt /tests/data/data_train.txt --dryrun --max-num-epochs 1 --model selfatt --hypers-override "{\"batch_size\":64}"
+
+  rnn:
+    needs: docker_build
+    name: rnn
+    runs-on: ubuntu-latest
+    steps:
+    - name: rnn, all languages
+      run: |
+        cd $GITHUB_WORKSPACE
+        docker run github/csnet-smoketest python train.py /src /tests/data/data_train.txt /tests/data/data_train.txt /tests/data/data_train.txt --dryrun --max-num-epochs 1 --model rnn
@@ -0,0 +1,27 @@
+# ts
+**/node_modules/
+/webroot/scripts/*.js
+
+# vim
+**/*.swp
+
+# python
+**/*.pyc
+**/__pycache__/
+
+# jupyter
+**/.ipynb_checkpoints/
+
+# data
+resources/
+!resources/README.md
+!tests/data/
+*.csv
+
+# environment
+*.ftpconfig
+
+.idea
+/src/wandb/run-*
+/src/wandb/debug.log
+*.html
@@ -0,0 +1,80 @@
+## Submitting runs to the benchmark
+
+The Weights & Biases (W&B) benchmark  tracks and compares models trained on the CodeSearchNet dataset by the global machine learning research community. Anyone is welcome to submit their results for review.
+
+## Submission process
+
+### Requirements
+
+There are a few requirements for submitting a model to the benchmark.
+- You must a have a run logged to [W&B](https://app.wandb.ai)
+- Your run must have attached inference results in a file named  `model_predictions.csv`. You can view all the files attached to a given run in the browser by clicking the "Files" icon from that run's main page. 
+- The schema outlined in the submission format section below must be strictly followed. 
+
+### Submission format
+
+A valid submission to the CodeSeachNet Challenge requires a file named **model_predictions.csv** with the following fields: `query`, `language`, `identifier`, and `url`:
+
+* `query`: the textual representation of the query, e.g. "int to string" .  
+* `language`: the programming language for the given query, e.g. "python".  This information is available as a field in the data to be scored.
+* `identifier`: this is an optional field that can help you track your data
+* `url`: the unique GitHub URL to the returned results, e.g. "https://github.com/JamesClonk/vultr/blob/fed59ad207c9bda0a5dfe4d18de53ccbb3d80c91/cmd/commands.go#L12-L190" . This information is available as a field in the data to be scored.
+      
+For further background and instructions on the submission process, see the root README.
+
+The row order corresponds to the result ranking in the search task. For example, if in row 5 there is an entry for the Python query "read properties file", and in row 60 another result for the Python query "read properties file", then the URL in row 5 is considered to be ranked higher than the URL in row 60 for that query and language.
+
+The script we used to create the baseline submission is [src/predict.py](src/predict.py).  You are not required to use this script to produce your submission file -- we only provide it for reference.
+
+Here is an example: 
+
+| query                 | language | identifier                        | url                                                                                                                                                   |
+| --------------------- | -------- | --------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- |
+| convert int to string | python   | int_to_decimal_str                | https://github.com/raphaelm/python-sepaxml/blob/187b699b1673c862002b2bae7e1bd62fe8623aec/sepaxml/utils.py#L64-L76                                     |
+| convert int to string | python   | str_to_int_array                  | https://github.com/UCSBarchlab/PyRTL/blob/0988e5c9c10ededd5e1f58d5306603f9edf4b3e2/pyrtl/rtllib/libutils.py#L23-L33                                   |
+| convert int to string | python   | Bcp47LanguageParser.IntStr26ToInt | https://github.com/google/transitfeed/blob/eb2991a3747ba541b2cb66502b305b6304a1f85f/extensions/googletransit/pybcp47/bcp47languageparser.py#L138-L139 |
+| convert int to string | python   | PrimaryEqualProof.to_str_dict     | https://github.com/hyperledger-archives/indy-anoncreds/blob/9d9cda3d505c312257d99a13d74d8f05dac3091a/anoncreds/protocol/types.py#L604-L613            |
+| convert int to string | python   | to_int                            | https://github.com/mfussenegger/cr8/blob/a37d6049f1f9fee2d0556efae2b7b7f8761bffe8/cr8/cli.py#L8-L23                                                   |
+| how to read .csv file in an efficient way? | ruby | Icosmith.Font.generate_scss                | https://github.com/tulios/icosmith-rails/blob/e73c11eaa593fcb6f9ba93d34fbdbfe131693af4/lib/icosmith-rails/font.rb#L80-L88             |
+| how to read .csv file in an efficient way? | ruby | WebSocket.Extensions.valid_frame_rsv       | https://github.com/faye/websocket-extensions-ruby/blob/1a441fac807e08597ec4b315d4022aea716f3efc/lib/websocket/extensions.rb#L120-L134 |
+| how to read .csv file in an efficient way? | ruby | APNS.Pem.read_file_at_path                 | https://github.com/jrbeck/mercurius/blob/1580a4af841a6f30ac62f87739fdff87e9608682/lib/mercurius/apns/pem.rb#L12-L18                   |
+
+
+
+### Submitting model predictions to W&B 
+
+You can submit your results to the benchmark as follows:
+
+1. Run a training job with any script (your own or the baseline example provided, with or without W&B logging).
+2. Generate your own file of model predictions following the format above and name it \`model_predictions.csv\`.
+3. Upload a run to wandb with this \`model_predictions.csv\` file attached.
+
+Our example script [src/predict.py](src/predict.py) takes care of steps 2 and 3 for a model whose training run has been logged to W&B, given the corresponding W&B run id, which you can find on the /overview page in the browser or by clicking the 'info' icon on a given run.
+
+Here is a short example script that will create a run in W&B and perform the upload (step 3) for a local file of predictions:
+```python
+import wandb
+wandb.init(project="codesearchnet", resume="must")
+wandb.save('model_predictions.csv')
+```
+
+### Publishing your submission
+
+You've now generated all the content required to submit a run to the CodeSearchNet benchmark. Using the W&B GitHub integration you can now submit your model for review via the web app.
+
+You can submit your runs by visiting the run page and clicking on the overview tab:
+![](https://github.com/wandb/core/blob/master/frontends/app/src/assets/run-page-benchmark.png?raw=true)
+
+or by selecting a run from the runs table:
+![](https://app.wandb.ai/static/media/submit_benchmark_run.e286da0d.png)
+
+### Result evaluation
+
+Once you upload your \`model_predictions.csv\` file, W&B will compute the normalized cumulative gain (NCG) of your model's predictions against the human-annotated relevance scores.  Further details on the evaluation process and metrics are in the root README. For transparency, we include the script used to evaluate submissions: [src/relevanceeval.py](src/relevanceeval.py)
+
+
+### Training the baseline model (optional)
+
+Replicating our results for the CodeSearchNet baseline is optional, as we encourage the community to create their own models and methods for ranking search results.  To replicate our baseline submission, you can start with the instructions in the [CodeSearchNet GitHub repository](https://github.com/ml-msr-github/CodeSearchNet).  This baseline model uses [src/predict.py](src/predict.py) to generate the submission file.
+
+Your run will be logged to W&B, within a project that will be automatically linked to this benchmark.
@@ -0,0 +1,76 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at [email protected]. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
@@ -0,0 +1,47 @@
+## Contributing
+
+[fork]: https://help.github.com/articles/fork-a-repo/
+[pr]: https://help.github.com/articles/creating-a-pull-request/
+[style]: https://www.python.org/dev/peps/pep-0008/
+[code-of-conduct]: CODE_OF_CONDUCT.md
+[azurepipelines]: azure-pipelines.yml
+[benchmark]: BENCHMARK.md
+
+Hi there! We're thrilled that you'd like to contribute to this project. Your help is essential for keeping it great.
+
+Contributions to this project are [released](https://help.github.com/articles/github-terms-of-service/#6-contributions-under-repository-license) to the public under the [project's open source license](LICENSE).
+
+Please note that this project is released with a [Contributor Code of Conduct][code-of-conduct]. By participating in this project you agree to abide by its terms.
+
+## Scope
+
+We anticipate that the community will design custom architectures and use frameworks other than Tensorflow.  Furthermore, we anticipate that other datasets beyond the ones provided in this project might be useful.  It is not our intention to integrate the best models and datasets into this repository as a superset of all available ideas.  Rather, we intend to provide baseline approaches and a central place of reference with links to related repositories from the community.  Therefore, we are accepting pull requests for the following items:
+
+- Bug fixes
+- Updates to documentation, including links to your project(s) where improvements to the baseline have been made
+- Minor improvements to the code
+
+Please open an issue if you are unsure regarding the best course of action.  
+
+## Submitting a pull request
+
+0. [Fork][fork] and clone the repository
+0. Configure and install the dependencies: `script/bootstrap`
+0. Make sure the tests pass on your machine: see [azure-pipelines.yml][azurepipelines] to see tests we are currently running.
+0. Create a new branch: `git checkout -b my-branch-name`
+0. Make your change, add tests, and make sure the tests still pass.
+0. Push to your fork and [submit a pull request][pr]
+0. Pat your self on the back and wait for your pull request to be reviewed and merged.
+
+Here are a few things you can do that will increase the likelihood of your pull request being accepted:
+
+- Follow the [style guide][style].
+- Write tests.
+- Keep your change as focused as possible. If there are multiple changes you would like to make that are not dependent upon each other, consider submitting them as separate pull requests.
+- Write a [good commit message](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html).
+
+## Resources
+
+- [How to Contribute to Open Source](https://opensource.guide/how-to-contribute/)
+- [Using Pull Requests](https://help.github.com/articles/about-pull-requests/)
+- [GitHub Help](https://help.github.com)
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 GitHub
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.